diff --git a/wasmrt.nim b/wasmrt.nim index eb046b4..fe39698 100644 --- a/wasmrt.nim +++ b/wasmrt.nim @@ -679,12 +679,21 @@ proc mmap(a: pointer, len: csize_t, prot, flags, fildes: cint, off: int): pointe wasmAlloc(len) +proc malloc(sz: csize_t): pointer {.exportc.} = alloc(sz) +proc free(p: pointer) {.exportc.} = dealloc(p) + # Suppress __wasm_call_ctors # https://stackoverflow.com/questions/72568387/why-is-an-objects-constructor-being-called-in-every-exported-wasm-function proc initialize() {.stackTrace: off, exportc: "_initialize", codegenDecl: wasmExportCodegenDecl.} = proc ctors() {.importc: "__wasm_call_ctors".} ctors() +when compileOption("stackTrace"): + {.push stackTrace: off.} + proc wasmStackTrace() {.exportwasm.} = + writeStackTrace() + {.pop.} + when not defined(gcDestructors): GC_disable() diff --git a/wasmrt/libc.nim b/wasmrt/libc.nim index f9a057f..cf7d0cd 100644 --- a/wasmrt/libc.nim +++ b/wasmrt/libc.nim @@ -1,7 +1,19 @@ -import posix +import posix, strutils, os import ../wasmrt +const + builtinsPath = currentSourcePath.rsplit({DirSep, AltSep}, 1)[0] & + "/llvm-builtins/builtins/" + +template c(s: string) = + {.compile: builtinsPath & s.} + +# TODO: Extend the following list as needed +c "multi3.c" +c "lshrti3.c" +c "ashrti3.c" + proc gettimeImpl(): cint {.importwasmf: "Date.now".} proc clock_gettime(clkId: Clockid, tp: var Timespec): cint {.exportc.} = diff --git a/wasmrt/llvm-builtins/README.md b/wasmrt/llvm-builtins/README.md new file mode 100644 index 0000000..7e8144b --- /dev/null +++ b/wasmrt/llvm-builtins/README.md @@ -0,0 +1,4 @@ +The builtins directory in this dir is a copy of +https://github.com/llvm/llvm-project/tree/main/compiler-rt/lib/builtins +of tag illvmorg-17.0.4 + diff --git a/wasmrt/llvm-builtins/builtins/CMakeLists.txt b/wasmrt/llvm-builtins/builtins/CMakeLists.txt new file mode 100644 index 0000000..d62fa04 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/CMakeLists.txt @@ -0,0 +1,939 @@ +# This directory contains a large amount of C code which provides +# generic implementations of the core runtime library along with optimized +# architecture-specific code in various subdirectories. + +if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + cmake_minimum_required(VERSION 3.20.0) + + set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + project(CompilerRTBuiltins C ASM) + set(COMPILER_RT_STANDALONE_BUILD TRUE) + set(COMPILER_RT_BUILTINS_STANDALONE_BUILD TRUE) + + set(COMPILER_RT_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../..") + + set(LLVM_COMMON_CMAKE_UTILS "${COMPILER_RT_SOURCE_DIR}/../cmake") + + # Add path for custom modules + list(INSERT CMAKE_MODULE_PATH 0 + "${COMPILER_RT_SOURCE_DIR}/cmake" + "${COMPILER_RT_SOURCE_DIR}/cmake/Modules" + "${LLVM_COMMON_CMAKE_UTILS}" + "${LLVM_COMMON_CMAKE_UTILS}/Modules" + ) + + include(base-config-ix) + include(CompilerRTUtils) + + if (NOT LLVM_RUNTIMES_BUILD) + load_llvm_config() + endif() + construct_compiler_rt_default_triple() + + include(SetPlatformToolchainTools) + if(APPLE) + include(CompilerRTDarwinUtils) + endif() + if(APPLE) + include(UseLibtool) + endif() + include(AddCompilerRT) +endif() + +if (COMPILER_RT_STANDALONE_BUILD) + # When compiler-rt is being built standalone, possibly as a cross-compilation + # target, the target may or may not want position independent code. This + # option provides an avenue through which the flag may be controlled when an + # LLVM configuration is not being utilized. + option(COMPILER_RT_BUILTINS_ENABLE_PIC + "Turns on or off -fPIC for the builtin library source" + ON) +endif() + +include(builtin-config-ix) +include(CMakeDependentOption) +include(CMakePushCheckState) + +option(COMPILER_RT_BUILTINS_HIDE_SYMBOLS + "Do not export any symbols from the static library." ON) + +# TODO: Need to add a mechanism for logging errors when builtin source files are +# added to a sub-directory and not this CMakeLists file. +set(GENERIC_SOURCES + absvdi2.c + absvsi2.c + absvti2.c + adddf3.c + addsf3.c + addvdi3.c + addvsi3.c + addvti3.c + apple_versioning.c + ashldi3.c + ashlti3.c + ashrdi3.c + ashrti3.c + bswapdi2.c + bswapsi2.c + clzdi2.c + clzsi2.c + clzti2.c + cmpdi2.c + cmpti2.c + comparedf2.c + comparesf2.c + ctzdi2.c + ctzsi2.c + ctzti2.c + divdc3.c + divdf3.c + divdi3.c + divmoddi4.c + divmodsi4.c + divmodti4.c + divsc3.c + divsf3.c + divsi3.c + divti3.c + extendsfdf2.c + extendhfsf2.c + ffsdi2.c + ffssi2.c + ffsti2.c + fixdfdi.c + fixdfsi.c + fixdfti.c + fixsfdi.c + fixsfsi.c + fixsfti.c + fixunsdfdi.c + fixunsdfsi.c + fixunsdfti.c + fixunssfdi.c + fixunssfsi.c + fixunssfti.c + floatdidf.c + floatdisf.c + floatsidf.c + floatsisf.c + floattidf.c + floattisf.c + floatundidf.c + floatundisf.c + floatunsidf.c + floatunsisf.c + floatuntidf.c + floatuntisf.c + fp_mode.c + int_util.c + lshrdi3.c + lshrti3.c + moddi3.c + modsi3.c + modti3.c + muldc3.c + muldf3.c + muldi3.c + mulodi4.c + mulosi4.c + muloti4.c + mulsc3.c + mulsf3.c + multi3.c + mulvdi3.c + mulvsi3.c + mulvti3.c + negdf2.c + negdi2.c + negsf2.c + negti2.c + negvdi2.c + negvsi2.c + negvti2.c + os_version_check.c + paritydi2.c + paritysi2.c + parityti2.c + popcountdi2.c + popcountsi2.c + popcountti2.c + powidf2.c + powisf2.c + subdf3.c + subsf3.c + subvdi3.c + subvsi3.c + subvti3.c + trampoline_setup.c + truncdfhf2.c + truncdfsf2.c + truncsfhf2.c + ucmpdi2.c + ucmpti2.c + udivdi3.c + udivmoddi4.c + udivmodsi4.c + udivmodti4.c + udivsi3.c + udivti3.c + umoddi3.c + umodsi3.c + umodti3.c +) + +# We only build BF16 files when "__bf16" is available. +set(BF16_SOURCES + truncdfbf2.c + truncsfbf2.c +) + +# TODO: Several "tf" files (and divtc3.c, but not multc3.c) are in +# GENERIC_SOURCES instead of here. +set(GENERIC_TF_SOURCES + addtf3.c + comparetf2.c + divtc3.c + divtf3.c + extenddftf2.c + extendhftf2.c + extendsftf2.c + fixtfdi.c + fixtfsi.c + fixtfti.c + fixunstfdi.c + fixunstfsi.c + fixunstfti.c + floatditf.c + floatsitf.c + floattitf.c + floatunditf.c + floatunsitf.c + floatuntitf.c + multc3.c + multf3.c + powitf2.c + subtf3.c + trunctfdf2.c + trunctfhf2.c + trunctfsf2.c +) + +option(COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN + "Skip the atomic builtin (these should normally be provided by a shared library)" + On) + +if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD) + set(GENERIC_SOURCES + ${GENERIC_SOURCES} + emutls.c + enable_execute_stack.c + eprintf.c + ) +endif() + +if(COMPILER_RT_HAS_ATOMIC_KEYWORD AND NOT COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN) + set(GENERIC_SOURCES + ${GENERIC_SOURCES} + atomic.c + ) +endif() + +if(APPLE) + set(GENERIC_SOURCES + ${GENERIC_SOURCES} + atomic_flag_clear.c + atomic_flag_clear_explicit.c + atomic_flag_test_and_set.c + atomic_flag_test_and_set_explicit.c + atomic_signal_fence.c + atomic_thread_fence.c + ) +endif() + +if (HAVE_UNWIND_H) + set(GENERIC_SOURCES + ${GENERIC_SOURCES} + gcc_personality_v0.c + ) +endif () + +if (NOT FUCHSIA) + set(GENERIC_SOURCES + ${GENERIC_SOURCES} + clear_cache.c + ) +endif() + +# These files are used on 32-bit and 64-bit x86. +set(x86_ARCH_SOURCES + cpu_model.c + ) + +if (NOT MSVC) + set(x86_ARCH_SOURCES + ${x86_ARCH_SOURCES} + i386/fp_mode.c + ) +endif () + +# Implement extended-precision builtins, assuming long double is 80 bits. +# long double is not 80 bits on Android or MSVC. +set(x86_80_BIT_SOURCES + divxc3.c + fixxfdi.c + fixxfti.c + fixunsxfdi.c + fixunsxfsi.c + fixunsxfti.c + floatdixf.c + floattixf.c + floatundixf.c + floatuntixf.c + mulxc3.c + powixf2.c +) + +if (NOT MSVC) + set(x86_64_SOURCES + ${GENERIC_SOURCES} + ${GENERIC_TF_SOURCES} + ${x86_ARCH_SOURCES} + x86_64/floatdidf.c + x86_64/floatdisf.c + ) + if (NOT WIN32) + set(x86_64_SOURCES + ${x86_64_SOURCES} + x86_64/floatundidf.S + x86_64/floatundisf.S + ) + endif() + + if (NOT ANDROID) + set(x86_64_SOURCES + ${x86_64_SOURCES} + ${x86_80_BIT_SOURCES} + x86_64/floatdixf.c + ) + if (NOT WIN32) + set(x86_64_SOURCES + ${x86_64_SOURCES} + x86_64/floatundixf.S + ) + endif() + endif() + + # Darwin x86_64 Haswell + set(x86_64h_SOURCES ${x86_64_SOURCES}) + + if (WIN32) + set(x86_64_SOURCES + ${x86_64_SOURCES} + x86_64/chkstk.S + x86_64/chkstk2.S + ) + endif() + + set(i386_SOURCES + ${GENERIC_SOURCES} + ${x86_ARCH_SOURCES} + i386/ashldi3.S + i386/ashrdi3.S + i386/divdi3.S + i386/floatdidf.S + i386/floatdisf.S + i386/floatundidf.S + i386/floatundisf.S + i386/lshrdi3.S + i386/moddi3.S + i386/muldi3.S + i386/udivdi3.S + i386/umoddi3.S + ) + + if (NOT ANDROID) + set(i386_SOURCES + ${i386_SOURCES} + ${x86_80_BIT_SOURCES} + i386/floatdixf.S + i386/floatundixf.S + ) + endif() + + if (WIN32) + set(i386_SOURCES + ${i386_SOURCES} + i386/chkstk.S + i386/chkstk2.S + ) + endif() +else () # MSVC + # Use C versions of functions when building on MSVC + # MSVC's assembler takes Intel syntax, not AT&T syntax. + # Also use only MSVC compilable builtin implementations. + set(x86_64_SOURCES + ${GENERIC_SOURCES} + ${x86_ARCH_SOURCES} + x86_64/floatdidf.c + x86_64/floatdisf.c + ) + set(i386_SOURCES ${GENERIC_SOURCES} ${x86_ARCH_SOURCES}) +endif () # if (NOT MSVC) + + +# builtin support for Targets that have Arm state or have Thumb2 +set(arm_or_thumb2_base_SOURCES + arm/fp_mode.c + arm/bswapdi2.S + arm/bswapsi2.S + arm/clzdi2.S + arm/clzsi2.S + arm/comparesf2.S + arm/divmodsi4.S + arm/divsi3.S + arm/modsi3.S + arm/udivmodsi4.S + arm/udivsi3.S + arm/umodsi3.S + ${GENERIC_SOURCES} +) + +set(arm_sync_SOURCES + arm/sync_fetch_and_add_4.S + arm/sync_fetch_and_add_8.S + arm/sync_fetch_and_and_4.S + arm/sync_fetch_and_and_8.S + arm/sync_fetch_and_max_4.S + arm/sync_fetch_and_max_8.S + arm/sync_fetch_and_min_4.S + arm/sync_fetch_and_min_8.S + arm/sync_fetch_and_nand_4.S + arm/sync_fetch_and_nand_8.S + arm/sync_fetch_and_or_4.S + arm/sync_fetch_and_or_8.S + arm/sync_fetch_and_sub_4.S + arm/sync_fetch_and_sub_8.S + arm/sync_fetch_and_umax_4.S + arm/sync_fetch_and_umax_8.S + arm/sync_fetch_and_umin_4.S + arm/sync_fetch_and_umin_8.S + arm/sync_fetch_and_xor_4.S + arm/sync_fetch_and_xor_8.S +) + +# builtin support for Thumb-only targets with very limited Thumb2 technology, +# such as v6-m and v8-m.baseline +set(thumb1_base_SOURCES + arm/divsi3.S + arm/udivsi3.S + arm/comparesf2.S + arm/addsf3.S + ${GENERIC_SOURCES} +) + +set(arm_EABI_SOURCES + arm/aeabi_cdcmp.S + arm/aeabi_cdcmpeq_check_nan.c + arm/aeabi_cfcmp.S + arm/aeabi_cfcmpeq_check_nan.c + arm/aeabi_dcmp.S + arm/aeabi_div0.c + arm/aeabi_drsub.c + arm/aeabi_fcmp.S + arm/aeabi_frsub.c + arm/aeabi_idivmod.S + arm/aeabi_ldivmod.S + arm/aeabi_memcmp.S + arm/aeabi_memcpy.S + arm/aeabi_memmove.S + arm/aeabi_memset.S + arm/aeabi_uidivmod.S + arm/aeabi_uldivmod.S +) + +set(arm_Thumb1_JT_SOURCES + arm/switch16.S + arm/switch32.S + arm/switch8.S + arm/switchu8.S +) +set(arm_Thumb1_SjLj_EH_SOURCES + arm/restore_vfp_d8_d15_regs.S + arm/save_vfp_d8_d15_regs.S +) +set(arm_Thumb1_VFPv2_DP_SOURCES + arm/adddf3vfp.S + arm/divdf3vfp.S + arm/eqdf2vfp.S + arm/extendsfdf2vfp.S + arm/fixdfsivfp.S + arm/fixunsdfsivfp.S + arm/floatsidfvfp.S + arm/floatunssidfvfp.S + arm/gedf2vfp.S + arm/gtdf2vfp.S + arm/ledf2vfp.S + arm/ltdf2vfp.S + arm/muldf3vfp.S + arm/nedf2vfp.S + arm/negdf2vfp.S + arm/subdf3vfp.S + arm/truncdfsf2vfp.S + arm/unorddf2vfp.S +) +set(arm_Thumb1_VFPv2_SP_SOURCES + arm/addsf3vfp.S + arm/divsf3vfp.S + arm/eqsf2vfp.S + arm/fixsfsivfp.S + arm/fixunssfsivfp.S + arm/floatsisfvfp.S + arm/floatunssisfvfp.S + arm/gesf2vfp.S + arm/gtsf2vfp.S + arm/lesf2vfp.S + arm/ltsf2vfp.S + arm/mulsf3vfp.S + arm/negsf2vfp.S + arm/nesf2vfp.S + arm/subsf3vfp.S + arm/unordsf2vfp.S +) +set(arm_Thumb1_icache_SOURCES + arm/sync_synchronize.S +) + +# thumb1 calling into Arm to cover support +set(arm_Thumb1_SOURCES + ${arm_Thumb1_JT_SOURCES} + ${arm_Thumb1_SjLj_EH_SOURCES} + ${arm_Thumb1_VFPv2_DP_SOURCES} + ${arm_Thumb1_VFPv2_SP_SOURCES} + ${arm_Thumb1_icache_SOURCES} +) + +# base functionality for Arm Targets prior to Arm v7-a and Armv6-m such as v6, +# v5t, v4t +set(arm_min_SOURCES + ${arm_or_thumb2_base_SOURCES} + ${arm_EABI_SOURCES} +) + +if(MINGW) + set(arm_SOURCES + arm/aeabi_idivmod.S + arm/aeabi_ldivmod.S + arm/aeabi_uidivmod.S + arm/aeabi_uldivmod.S + arm/chkstk.S + ${arm_or_thumb2_base_SOURCES} + ${arm_sync_SOURCES} + ) + + set(thumb1_SOURCES + ${thumb1_base_SOURCES} + ) +elseif(NOT WIN32) + # TODO the EABI sources should only be added to EABI targets + set(arm_SOURCES + ${arm_or_thumb2_base_SOURCES} + ${arm_sync_SOURCES} + ${arm_EABI_SOURCES} + ${arm_Thumb1_SOURCES} + ) + + set(thumb1_SOURCES + ${thumb1_base_SOURCES} + ${arm_EABI_SOURCES} + ) +endif() + +set(aarch64_SOURCES + ${GENERIC_TF_SOURCES} + ${GENERIC_SOURCES} + cpu_model.c + aarch64/fp_mode.c +) + +# Generate outline atomics helpers from lse.S base +set(OA_HELPERS_DIR "${CMAKE_CURRENT_BINARY_DIR}/outline_atomic_helpers.dir") +file(MAKE_DIRECTORY "${OA_HELPERS_DIR}") + +if(CMAKE_HOST_UNIX) + set(COMPILER_RT_LINK_OR_COPY create_symlink) +else() + set(COMPILER_RT_LINK_OR_COPY copy) +endif() + +foreach(pat cas swp ldadd ldclr ldeor ldset) + foreach(size 1 2 4 8 16) + foreach(model 1 2 3 4 5) + if(pat STREQUAL "cas" OR NOT size STREQUAL "16") + set(helper_asm "${OA_HELPERS_DIR}/outline_atomic_${pat}${size}_${model}.S") + list(APPEND lse_builtins "${helper_asm}") + list(APPEND arm64_lse_commands COMMAND ${CMAKE_COMMAND} -E ${COMPILER_RT_LINK_OR_COPY} "${CMAKE_CURRENT_SOURCE_DIR}/aarch64/lse.S" "${helper_asm}") + set_source_files_properties("${helper_asm}" + PROPERTIES + COMPILE_DEFINITIONS "L_${pat};SIZE=${size};MODEL=${model}" + INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}" + ) + list(APPEND aarch64_SOURCES "${helper_asm}") + endif() + endforeach(model) + endforeach(size) +endforeach(pat) + +if (MINGW) + set(aarch64_SOURCES + ${aarch64_SOURCES} + aarch64/chkstk.S + ) +endif() + +set(armv4t_SOURCES ${arm_min_SOURCES}) +set(armv5te_SOURCES ${arm_min_SOURCES}) +set(armv6_SOURCES ${arm_min_SOURCES}) +set(armhf_SOURCES ${arm_SOURCES}) +set(armv7_SOURCES ${arm_SOURCES}) +set(armv7s_SOURCES ${arm_SOURCES}) +set(armv7k_SOURCES ${arm_SOURCES}) +set(arm64_SOURCES ${aarch64_SOURCES}) +set(arm64e_SOURCES ${aarch64_SOURCES}) +set(arm64_32_SOURCES ${aarch64_SOURCES}) + +# macho_embedded archs +set(armv6m_SOURCES ${thumb1_SOURCES}) +set(armv7m_SOURCES ${arm_SOURCES}) +set(armv7em_SOURCES ${arm_SOURCES}) +set(armv8m.base_SOURCES ${thumb1_SOURCES}) +set(armv8m.main_SOURCES ${arm_SOURCES}) +set(armv8.1m.main_SOURCES ${arm_SOURCES}) + +# 8-bit AVR MCU +set(avr_SOURCES + avr/mulqi3.S + avr/mulhi3.S + avr/exit.S + avr/divmodhi4.S + avr/udivmodhi4.S + avr/divmodqi4.S + avr/udivmodqi4.S + ${GENERIC_SOURCES} +) + +# hexagon arch +set(hexagon_SOURCES + hexagon/common_entry_exit_abi1.S + hexagon/common_entry_exit_abi2.S + hexagon/common_entry_exit_legacy.S + hexagon/dfaddsub.S + hexagon/dfdiv.S + hexagon/dffma.S + hexagon/dfminmax.S + hexagon/dfmul.S + hexagon/dfsqrt.S + hexagon/divdi3.S + hexagon/divsi3.S + hexagon/fastmath2_dlib_asm.S + hexagon/fastmath2_ldlib_asm.S + hexagon/fastmath_dlib_asm.S + hexagon/memcpy_forward_vp4cp4n2.S + hexagon/memcpy_likely_aligned.S + hexagon/moddi3.S + hexagon/modsi3.S + hexagon/sfdiv_opt.S + hexagon/sfsqrt_opt.S + hexagon/udivdi3.S + hexagon/udivmoddi4.S + hexagon/udivmodsi4.S + hexagon/udivsi3.S + hexagon/umoddi3.S + hexagon/umodsi3.S + ${GENERIC_SOURCES} + ${GENERIC_TF_SOURCES} +) + +set(loongarch_SOURCES + loongarch/fp_mode.c + ${GENERIC_SOURCES} + ${GENERIC_TF_SOURCES} +) +set(loongarch64_SOURCES + ${loongarch_SOURCES} +) + +set(mips_SOURCES ${GENERIC_SOURCES}) +set(mipsel_SOURCES ${mips_SOURCES}) +set(mips64_SOURCES ${GENERIC_TF_SOURCES} + ${mips_SOURCES}) +set(mips64el_SOURCES ${GENERIC_TF_SOURCES} + ${mips_SOURCES}) + +set(powerpc_SOURCES ${GENERIC_SOURCES}) + +set(powerpcspe_SOURCES ${GENERIC_SOURCES}) + +set(powerpc64_SOURCES + ppc/divtc3.c + ppc/fixtfdi.c + ppc/fixunstfdi.c + ppc/floatditf.c + ppc/floatunditf.c + ppc/gcc_qadd.c + ppc/gcc_qdiv.c + ppc/gcc_qmul.c + ppc/gcc_qsub.c + ppc/multc3.c + ${GENERIC_SOURCES} +) +# These routines require __int128, which isn't supported on AIX. +if (NOT OS_NAME MATCHES "AIX") + set(powerpc64_SOURCES + ppc/floattitf.c + ppc/fixtfti.c + ppc/fixunstfti.c + ${powerpc64_SOURCES} + ) +endif() +set(powerpc64le_SOURCES ${powerpc64_SOURCES}) + +set(riscv_SOURCES + riscv/fp_mode.c + riscv/save.S + riscv/restore.S + ${GENERIC_SOURCES} + ${GENERIC_TF_SOURCES} +) +set(riscv32_SOURCES + riscv/mulsi3.S + ${riscv_SOURCES} +) +set(riscv64_SOURCES + riscv/muldi3.S + ${riscv_SOURCES} +) + +set(sparc_SOURCES ${GENERIC_SOURCES} ${GENERIC_TF_SOURCES}) +set(sparcv9_SOURCES ${GENERIC_SOURCES} ${GENERIC_TF_SOURCES}) + +set(wasm32_SOURCES + ${GENERIC_TF_SOURCES} + ${GENERIC_SOURCES} +) +set(wasm64_SOURCES + ${GENERIC_TF_SOURCES} + ${GENERIC_SOURCES} +) + +set(ve_SOURCES + ve/grow_stack.S + ve/grow_stack_align.S + ${GENERIC_TF_SOURCES} + ${GENERIC_SOURCES}) + +add_custom_target(builtins) +set_target_properties(builtins PROPERTIES FOLDER "Compiler-RT Misc") + +option(COMPILER_RT_ENABLE_SOFTWARE_INT128 + "Enable the int128 builtin routines for all targets." + OFF) + +if (APPLE) + add_subdirectory(Darwin-excludes) + add_subdirectory(macho_embedded) + darwin_add_builtin_libraries(${BUILTIN_SUPPORTED_OS}) +else () + set(BUILTIN_CFLAGS "") + add_security_warnings(BUILTIN_CFLAGS 0) + + if (COMPILER_RT_HAS_FCF_PROTECTION_FLAG) + append_list_if(COMPILER_RT_ENABLE_CET -fcf-protection=full BUILTIN_CFLAGS) + endif() + + append_list_if(COMPILER_RT_HAS_STD_C11_FLAG -std=c11 BUILTIN_CFLAGS) + + # These flags would normally be added to CMAKE_C_FLAGS by the llvm + # cmake step. Add them manually if this is a standalone build. + if(COMPILER_RT_STANDALONE_BUILD) + if(COMPILER_RT_BUILTINS_ENABLE_PIC) + append_list_if(COMPILER_RT_HAS_FPIC_FLAG -fPIC BUILTIN_CFLAGS) + endif() + append_list_if(COMPILER_RT_HAS_FNO_BUILTIN_FLAG -fno-builtin BUILTIN_CFLAGS) + if(COMPILER_RT_BUILTINS_HIDE_SYMBOLS) + append_list_if(COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG -fvisibility=hidden BUILTIN_CFLAGS) + endif() + if(NOT COMPILER_RT_DEBUG) + append_list_if(COMPILER_RT_HAS_OMIT_FRAME_POINTER_FLAG -fomit-frame-pointer BUILTIN_CFLAGS) + endif() + endif() + + set(BUILTIN_DEFS "") + + if(COMPILER_RT_BUILTINS_HIDE_SYMBOLS) + append_list_if(COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG VISIBILITY_HIDDEN BUILTIN_DEFS) + endif() + + if(COMPILER_RT_DISABLE_AARCH64_FMV) + list(APPEND BUILTIN_DEFS DISABLE_AARCH64_FMV) + endif() + + append_list_if(COMPILER_RT_HAS_ASM_LSE HAS_ASM_LSE BUILTIN_DEFS) + + foreach (arch ${BUILTIN_SUPPORTED_ARCH}) + if (CAN_TARGET_${arch}) + cmake_push_check_state() + # TODO: we should probably make most of the checks in builtin-config depend on the target flags. + message(STATUS "Performing additional configure checks with target flags: ${TARGET_${arch}_CFLAGS}") + set(BUILTIN_CFLAGS_${arch} ${BUILTIN_CFLAGS}) + list(APPEND CMAKE_REQUIRED_FLAGS ${TARGET_${arch}_CFLAGS} ${BUILTIN_CFLAGS_${arch}}) + # For ARM archs, exclude any VFP builtins if VFP is not supported + if (${arch} MATCHES "^(arm|armhf|armv7|armv7s|armv7k|armv7m|armv7em|armv8m.main|armv8.1m.main)$") + string(REPLACE ";" " " _TARGET_${arch}_CFLAGS "${TARGET_${arch}_CFLAGS}") + check_compile_definition(__ARM_FP "${CMAKE_C_FLAGS} ${_TARGET_${arch}_CFLAGS}" COMPILER_RT_HAS_${arch}_VFP) + if(NOT COMPILER_RT_HAS_${arch}_VFP) + list(REMOVE_ITEM ${arch}_SOURCES ${arm_Thumb1_VFPv2_DP_SOURCES} ${arm_Thumb1_VFPv2_SP_SOURCES} ${arm_Thumb1_SjLj_EH_SOURCES}) + else() + # Exclude any double-precision builtins if VFP is single-precision-only + try_compile_only(COMPILER_RT_HAS_${arch}_VFP_DP + SOURCE "#if !(__ARM_FP & 0x8) + #error No double-precision support! + #endif + int main(void) { return 0; }") + if(NOT COMPILER_RT_HAS_${arch}_VFP_DP) + list(REMOVE_ITEM ${arch}_SOURCES ${arm_Thumb1_VFPv2_DP_SOURCES}) + endif() + endif() + endif() + check_c_source_compiles("_Float16 foo(_Float16 x) { return x; }" + COMPILER_RT_HAS_${arch}_FLOAT16) + append_list_if(COMPILER_RT_HAS_${arch}_FLOAT16 -DCOMPILER_RT_HAS_FLOAT16 BUILTIN_CFLAGS_${arch}) + check_c_source_compiles("__bf16 foo(__bf16 x) { return x; }" + COMPILER_RT_HAS_${arch}_BFLOAT16) + # Build BF16 files only when "__bf16" is available. + if(COMPILER_RT_HAS_${arch}_BFLOAT16) + list(APPEND ${arch}_SOURCES ${BF16_SOURCES}) + endif() + + # Remove a generic C builtin when an arch-specific builtin is specified. + filter_builtin_sources(${arch}_SOURCES ${arch}) + + # Needed for clear_cache on debug mode, due to r7's usage in inline asm. + # Release mode already sets it via -O2/3, Debug mode doesn't. + if (${arch} STREQUAL "armhf") + list(APPEND BUILTIN_CFLAGS_${arch} -fomit-frame-pointer -DCOMPILER_RT_ARMHF_TARGET) + endif() + + # For RISCV32, we must force enable int128 for compiling long + # double routines. + if(COMPILER_RT_ENABLE_SOFTWARE_INT128 OR "${arch}" STREQUAL "riscv32") + list(APPEND BUILTIN_CFLAGS_${arch} -fforce-enable-int128) + endif() + + if(arch STREQUAL "aarch64") + add_custom_target( + lse_builtin_symlinks + BYPRODUCTS ${lse_builtins} + ${arm64_lse_commands} + ) + + set(deps_aarch64 lse_builtin_symlinks) + endif() + + add_compiler_rt_runtime(clang_rt.builtins + STATIC + ARCHS ${arch} + DEPS ${deps_${arch}} + SOURCES ${${arch}_SOURCES} + DEFS ${BUILTIN_DEFS} + CFLAGS ${BUILTIN_CFLAGS_${arch}} + PARENT_TARGET builtins) + cmake_pop_check_state() + endif () + endforeach () +endif () + +add_dependencies(compiler-rt builtins) + +option(COMPILER_RT_BUILD_STANDALONE_LIBATOMIC + "Build standalone shared atomic library." + OFF) + +if(COMPILER_RT_BUILD_STANDALONE_LIBATOMIC) + add_custom_target(builtins-standalone-atomic) + set(BUILTIN_TYPE SHARED) + if(OS_NAME MATCHES "AIX") + include(CompilerRTAIXUtils) + if(NOT COMPILER_RT_LIBATOMIC_LINK_FLAGS) + get_aix_libatomic_default_link_flags(COMPILER_RT_LIBATOMIC_LINK_FLAGS + "${CMAKE_CURRENT_SOURCE_DIR}/ppc/atomic.exp") + endif() + # For different versions of cmake, SHARED behaves differently. For some + # versions, we might need MODULE rather than SHARED. + get_aix_libatomic_type(BUILTIN_TYPE) + else() + list(APPEND COMPILER_RT_LIBATOMIC_LINK_FLAGS -nodefaultlibs) + endif() + foreach (arch ${BUILTIN_SUPPORTED_ARCH}) + if(CAN_TARGET_${arch}) + add_compiler_rt_runtime(clang_rt.atomic + ${BUILTIN_TYPE} + ARCHS ${arch} + SOURCES atomic.c + LINK_FLAGS ${COMPILER_RT_LIBATOMIC_LINK_FLAGS} + DEPS builtins + PARENT_TARGET builtins-standalone-atomic) + endif() + endforeach() + # FIXME: On AIX, we have to archive built shared libraries into a static + # archive, i.e., libatomic.a. Once cmake adds support of such usage for AIX, + # this ad-hoc part can be removed. + if(OS_NAME MATCHES "AIX") + archive_aix_libatomic(clang_rt.atomic libatomic + ARCHS ${BUILTIN_SUPPORTED_ARCH} + PARENT_TARGET builtins-standalone-atomic) + endif() + add_dependencies(compiler-rt builtins-standalone-atomic) +endif() + +# TODO: COMPILER_RT_BUILD_CRT used to be a cached variable so we need to unset +# it first so cmake_dependent_option can set the local variable of the same +# name. This statement can be removed in the future. +unset(COMPILER_RT_BUILD_CRT CACHE) + +cmake_dependent_option(COMPILER_RT_BUILD_CRT "Build crtbegin.o/crtend.o" ON "COMPILER_RT_HAS_CRT" OFF) + +if (COMPILER_RT_BUILD_CRT) + add_compiler_rt_component(crt) + + option(COMPILER_RT_CRT_USE_EH_FRAME_REGISTRY "Use eh_frame in crtbegin.o/crtend.o" ON) + + include(CheckSectionExists) + check_section_exists(".init_array" COMPILER_RT_HAS_INITFINI_ARRAY + SOURCE "volatile int x;\n__attribute__((constructor)) void f(void) {x = 0;}\nint main(void) { return 0; }\n") + + append_list_if(COMPILER_RT_HAS_STD_C11_FLAG -std=c11 CRT_CFLAGS) + append_list_if(COMPILER_RT_HAS_INITFINI_ARRAY -DCRT_HAS_INITFINI_ARRAY CRT_CFLAGS) + append_list_if(COMPILER_RT_CRT_USE_EH_FRAME_REGISTRY -DEH_USE_FRAME_REGISTRY CRT_CFLAGS) + append_list_if(COMPILER_RT_HAS_FPIC_FLAG -fPIC CRT_CFLAGS) + append_list_if(COMPILER_RT_HAS_WNO_PEDANTIC -Wno-pedantic CRT_CFLAGS) + if (COMPILER_RT_HAS_FCF_PROTECTION_FLAG) + append_list_if(COMPILER_RT_ENABLE_CET -fcf-protection=full CRT_CFLAGS) + endif() + + foreach(arch ${BUILTIN_SUPPORTED_ARCH}) + add_compiler_rt_runtime(clang_rt.crtbegin + OBJECT + ARCHS ${arch} + SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/crtbegin.c + CFLAGS ${CRT_CFLAGS} + PARENT_TARGET crt) + add_compiler_rt_runtime(clang_rt.crtend + OBJECT + ARCHS ${arch} + SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/crtend.c + CFLAGS ${CRT_CFLAGS} + PARENT_TARGET crt) + endforeach() + + add_dependencies(compiler-rt crt) +endif() diff --git a/wasmrt/llvm-builtins/builtins/Darwin-excludes/CMakeLists.txt b/wasmrt/llvm-builtins/builtins/Darwin-excludes/CMakeLists.txt new file mode 100644 index 0000000..266e422 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/Darwin-excludes/CMakeLists.txt @@ -0,0 +1,4 @@ +file(GLOB filter_files ${CMAKE_CURRENT_SOURCE_DIR}/*.txt) +foreach(filter_file ${filter_files}) + set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${filter_file}) +endforeach() diff --git a/wasmrt/llvm-builtins/builtins/Darwin-excludes/README.TXT b/wasmrt/llvm-builtins/builtins/Darwin-excludes/README.TXT new file mode 100644 index 0000000..173eccc --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/Darwin-excludes/README.TXT @@ -0,0 +1,11 @@ +This folder contains list of symbols that should be excluded from the builtin +libraries for Darwin. There are two reasons symbols are excluded: + +(1) They aren't supported on Darwin +(2) They are contained within the OS on the minimum supported target + +The builtin libraries must contain all symbols not provided by the lowest +supported target OS. Meaning if minimum deployment target is iOS 6, all builtins +not included in the ios6-.txt files need to be included. The one catch is +that this is per-architecture. Since iOS 6 doesn't support arm64, when supporting +iOS 6, the minimum deployment target for arm64 binaries is iOS 7. diff --git a/wasmrt/llvm-builtins/builtins/Darwin-excludes/ios-armv7.txt b/wasmrt/llvm-builtins/builtins/Darwin-excludes/ios-armv7.txt new file mode 100644 index 0000000..6aa542f --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/Darwin-excludes/ios-armv7.txt @@ -0,0 +1,57 @@ +absvti2 +addtf3 +addvti3 +aeabi_cdcmp +aeabi_cdcmpeq_check_nan +aeabi_cfcmp +aeabi_cfcmpeq_check_nan +aeabi_dcmp +aeabi_div0 +aeabi_drsub +aeabi_fcmp +aeabi_frsub +aeabi_idivmod +aeabi_ldivmod +aeabi_memcmp +aeabi_memcpy +aeabi_memmove +aeabi_memset +aeabi_uidivmod +aeabi_uldivmod +ashlti3 +ashrti3 +clzti2 +cmpti2 +ctzti2 +divtf3 +divti3 +ffsti2 +fixdfti +fixsfti +fixunsdfti +fixunssfti +fixunsxfti +fixxfti +floattidf +floattisf +floattixf +floatuntidf +floatuntisf +floatuntixf +lshrti3 +modti3 +multf3 +multi3 +mulvti3 +negti2 +negvti2 +parityti2 +popcountti2 +powitf2 +subtf3 +subvti3 +trampoline_setup +ucmpti2 +udivmodti4 +udivti3 +umodti3 diff --git a/wasmrt/llvm-builtins/builtins/Darwin-excludes/ios-armv7s.txt b/wasmrt/llvm-builtins/builtins/Darwin-excludes/ios-armv7s.txt new file mode 100644 index 0000000..28167aa --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/Darwin-excludes/ios-armv7s.txt @@ -0,0 +1,57 @@ +absvti2 +addtf3 +addvti3 +aeabi_cdcmp +aeabi_cdcmpeq_check_nan +aeabi_cfcmp +aeabi_cfcmpeq_check_nan +aeabi_dcmp +aeabi_div0 +aeabi_drsub +aeabi_fcmp +aeabi_frsub +aeabi_idivmod +aeabi_ldivmod +aeabi_memcmp +aeabi_memcpy +aeabi_memmove +aeabi_memset +aeabi_uidivmod +aeabi_uldivmod +ashlti3 +ashrti3 +clzti2 +cmpti2 +ctzti2 +divtf3 +divti3 +ffsti2 +fixdfti +fixsfti +fixunsdfti +fixunssfti +fixunsxfti +fixxfti +floattidf +floattisf +floattixf +floatuntidf +floatuntisf +floatuntixf +lshrti3 +modti3 +multf +multi3 +mulvti3 +negti2 +negvti2 +parityti2 +popcountti2 +powitf2 +subtf3 +subvti3 +trampoline_setup +ucmpti2 +udivmodti4 +udivti3 +umodti3 diff --git a/wasmrt/llvm-builtins/builtins/Darwin-excludes/ios.txt b/wasmrt/llvm-builtins/builtins/Darwin-excludes/ios.txt new file mode 100644 index 0000000..5db2400 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/Darwin-excludes/ios.txt @@ -0,0 +1 @@ +apple_versioning diff --git a/wasmrt/llvm-builtins/builtins/Darwin-excludes/ios6-armv7.txt b/wasmrt/llvm-builtins/builtins/Darwin-excludes/ios6-armv7.txt new file mode 100644 index 0000000..b01fa71 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/Darwin-excludes/ios6-armv7.txt @@ -0,0 +1,120 @@ +absvdi2 +absvsi2 +adddf3 +adddf3vfp +addsf3 +addsf3vfp +addvdi3 +addvsi3 +ashldi3 +ashrdi3 +bswapdi2 +bswapsi2 +clzdi2 +clzsi2 +cmpdi2 +ctzdi2 +ctzsi2 +divdc3 +divdf3 +divdf3vfp +divdi3 +divmodsi4 +divsc3 +divsf3 +divsf3vfp +divsi3 +eqdf2 +eqdf2vfp +eqsf2 +eqsf2vfp +extendsfdf2 +extendsfdf2vfp +ffsdi2 +fixdfdi +fixdfsi +fixdfsivfp +fixsfdi +fixsfsi +fixsfsivfp +fixunsdfdi +fixunsdfsi +fixunsdfsivfp +fixunssfdi +fixunssfsi +fixunssfsivfp +floatdidf +floatdisf +floatsidf +floatsidfvfp +floatsisf +floatsisfvfp +floatundidf +floatundisf +floatunsidf +floatunsisf +floatunssidfvfp +floatunssisfvfp +gcc_personality_sj0 +gedf2 +gedf2vfp +gesf2 +gesf2vfp +gtdf2 +gtdf2vfp +gtsf2 +gtsf2vfp +ledf2 +ledf2vfp +lesf2 +lesf2vfp +lshrdi3 +ltdf2 +ltdf2vfp +ltsf2 +ltsf2vfp +moddi3 +modsi3 +muldc3 +muldf3 +muldf3vfp +muldi3 +mulodi4 +mulosi4 +mulsc3 +mulsf3 +mulsf3vfp +mulvdi3 +mulvsi3 +nedf2 +nedf2vfp +negdi2 +negvdi2 +negvsi2 +nesf2 +nesf2vfp +paritydi2 +paritysi2 +popcountdi2 +popcountsi2 +powidf2 +powisf2 +subdf3 +subdf3vfp +subsf3 +subsf3vfp +subvdi3 +subvsi3 +truncdfsf2 +truncdfsf2vfp +ucmpdi2 +udivdi3 +udivmoddi4 +udivmodsi4 +udivsi3 +umoddi3 +umodsi3 +unorddf2 +unorddf2vfp +unordsf2 +unordsf2vfp diff --git a/wasmrt/llvm-builtins/builtins/Darwin-excludes/ios6-armv7s.txt b/wasmrt/llvm-builtins/builtins/Darwin-excludes/ios6-armv7s.txt new file mode 100644 index 0000000..b01fa71 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/Darwin-excludes/ios6-armv7s.txt @@ -0,0 +1,120 @@ +absvdi2 +absvsi2 +adddf3 +adddf3vfp +addsf3 +addsf3vfp +addvdi3 +addvsi3 +ashldi3 +ashrdi3 +bswapdi2 +bswapsi2 +clzdi2 +clzsi2 +cmpdi2 +ctzdi2 +ctzsi2 +divdc3 +divdf3 +divdf3vfp +divdi3 +divmodsi4 +divsc3 +divsf3 +divsf3vfp +divsi3 +eqdf2 +eqdf2vfp +eqsf2 +eqsf2vfp +extendsfdf2 +extendsfdf2vfp +ffsdi2 +fixdfdi +fixdfsi +fixdfsivfp +fixsfdi +fixsfsi +fixsfsivfp +fixunsdfdi +fixunsdfsi +fixunsdfsivfp +fixunssfdi +fixunssfsi +fixunssfsivfp +floatdidf +floatdisf +floatsidf +floatsidfvfp +floatsisf +floatsisfvfp +floatundidf +floatundisf +floatunsidf +floatunsisf +floatunssidfvfp +floatunssisfvfp +gcc_personality_sj0 +gedf2 +gedf2vfp +gesf2 +gesf2vfp +gtdf2 +gtdf2vfp +gtsf2 +gtsf2vfp +ledf2 +ledf2vfp +lesf2 +lesf2vfp +lshrdi3 +ltdf2 +ltdf2vfp +ltsf2 +ltsf2vfp +moddi3 +modsi3 +muldc3 +muldf3 +muldf3vfp +muldi3 +mulodi4 +mulosi4 +mulsc3 +mulsf3 +mulsf3vfp +mulvdi3 +mulvsi3 +nedf2 +nedf2vfp +negdi2 +negvdi2 +negvsi2 +nesf2 +nesf2vfp +paritydi2 +paritysi2 +popcountdi2 +popcountsi2 +powidf2 +powisf2 +subdf3 +subdf3vfp +subsf3 +subsf3vfp +subvdi3 +subvsi3 +truncdfsf2 +truncdfsf2vfp +ucmpdi2 +udivdi3 +udivmoddi4 +udivmodsi4 +udivsi3 +umoddi3 +umodsi3 +unorddf2 +unorddf2vfp +unordsf2 +unordsf2vfp diff --git a/wasmrt/llvm-builtins/builtins/Darwin-excludes/ios7-arm64.txt b/wasmrt/llvm-builtins/builtins/Darwin-excludes/ios7-arm64.txt new file mode 100644 index 0000000..5e4caf9 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/Darwin-excludes/ios7-arm64.txt @@ -0,0 +1,16 @@ +clzti2 +divti3 +fixdfti +fixsfti +fixunsdfti +floattidf +floattisf +floatuntidf +floatuntisf +gcc_personality_v0 +modti3 +powidf2 +powisf2 +udivmodti4 +udivti3 +umodti3 diff --git a/wasmrt/llvm-builtins/builtins/Darwin-excludes/iossim-i386.txt b/wasmrt/llvm-builtins/builtins/Darwin-excludes/iossim-i386.txt new file mode 100644 index 0000000..60c0e2d --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/Darwin-excludes/iossim-i386.txt @@ -0,0 +1,82 @@ +absvti2 +addtf3 +addvti3 +ashlti3 +ashrti3 +clzti2 +cmpti2 +ctzti2 +divti3 +divtf3 +ffsti2 +fixdfti +fixsfti +fixunsdfti +fixunssfti +fixunsxfti +fixxfti +floattidf +floattisf +floattixf +floatuntidf +floatuntisf +floatuntixf +lshrti3 +modti3 +muloti4 +multi3 +multf3 +mulvti3 +negti2 +negvti2 +parityti2 +popcountti2 +powitf2 +subvti3 +subtf3 +trampoline_setup +ucmpti2 +udivmodti4 +udivti3 +umodti3 +absvti2 +addtf3 +addvti3 +ashlti3 +ashrti3 +clzti2 +cmpti2 +ctzti2 +divti3 +divtf3 +ffsti2 +fixdfti +fixsfti +fixunsdfti +fixunssfti +fixunsxfti +fixxfti +floattidf +floattisf +floattixf +floatuntidf +floatuntisf +floatuntixf +lshrti3 +modti3 +muloti4 +multi3 +multf3 +mulvti3 +negti2 +negvti2 +parityti2 +popcountti2 +powitf2 +subvti3 +subtf3 +trampoline_setup +ucmpti2 +udivmodti4 +udivti3 +umodti3 diff --git a/wasmrt/llvm-builtins/builtins/Darwin-excludes/iossim-x86_64.txt b/wasmrt/llvm-builtins/builtins/Darwin-excludes/iossim-x86_64.txt new file mode 100644 index 0000000..de1574e --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/Darwin-excludes/iossim-x86_64.txt @@ -0,0 +1,12 @@ +addtf3 +divtf3 +multf3 +powitf2 +subtf3 +trampoline_setup +addtf3 +divtf3 +multf3 +powitf2 +subtf3 +trampoline_setup diff --git a/wasmrt/llvm-builtins/builtins/Darwin-excludes/iossim.txt b/wasmrt/llvm-builtins/builtins/Darwin-excludes/iossim.txt new file mode 100644 index 0000000..5db2400 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/Darwin-excludes/iossim.txt @@ -0,0 +1 @@ +apple_versioning diff --git a/wasmrt/llvm-builtins/builtins/Darwin-excludes/osx-i386.txt b/wasmrt/llvm-builtins/builtins/Darwin-excludes/osx-i386.txt new file mode 100644 index 0000000..f2ee7fe --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/Darwin-excludes/osx-i386.txt @@ -0,0 +1,35 @@ +absvti2 +addvti3 +ashlti3 +ashrti3 +clzti2 +cmpti2 +ctzti2 +divti3 +ffsti2 +fixdfti +fixsfti +fixunsdfti +fixunssfti +fixunsxfti +fixxfti +floattidf +floattisf +floattixf +floatuntidf +floatuntisf +floatuntixf +lshrti3 +modti3 +muloti4 +multi3 +mulvti3 +negti2 +negvti2 +parityti2 +popcountti2 +subvti3 +ucmpti2 +udivmodti4 +udivti3 +umodti3 diff --git a/wasmrt/llvm-builtins/builtins/Darwin-excludes/osx.txt b/wasmrt/llvm-builtins/builtins/Darwin-excludes/osx.txt new file mode 100644 index 0000000..6f9d0a7 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/Darwin-excludes/osx.txt @@ -0,0 +1,7 @@ +apple_versioning +addtf3 +divtf3 +multf3 +powitf2 +subtf3 +trampoline_setup diff --git a/wasmrt/llvm-builtins/builtins/README.txt b/wasmrt/llvm-builtins/builtins/README.txt new file mode 100644 index 0000000..5637183 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/README.txt @@ -0,0 +1,353 @@ +Compiler-RT +================================ + +This directory and its subdirectories contain source code for the compiler +support routines. + +Compiler-RT is open source software. You may freely distribute it under the +terms of the license agreement found in LICENSE.txt. + +================================ + +This is a replacement library for libgcc. Each function is contained +in its own file. Each function has a corresponding unit test under +test/Unit. + +A rudimentary script to test each file is in the file called +test/Unit/test. + +Here is the specification for this library: + +http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc + +Please note that the libgcc specification explicitly mentions actual types of +arguments and returned values being expressed with machine modes. +In some cases particular types such as "int", "unsigned", "long long", etc. +may be specified just as examples there. + +Here is a synopsis of the contents of this library: + +typedef int32_t si_int; +typedef uint32_t su_int; + +typedef int64_t di_int; +typedef uint64_t du_int; + +// Integral bit manipulation + +di_int __ashldi3(di_int a, int b); // a << b +ti_int __ashlti3(ti_int a, int b); // a << b + +di_int __ashrdi3(di_int a, int b); // a >> b arithmetic (sign fill) +ti_int __ashrti3(ti_int a, int b); // a >> b arithmetic (sign fill) +di_int __lshrdi3(di_int a, int b); // a >> b logical (zero fill) +ti_int __lshrti3(ti_int a, int b); // a >> b logical (zero fill) + +int __clzsi2(si_int a); // count leading zeros +int __clzdi2(di_int a); // count leading zeros +int __clzti2(ti_int a); // count leading zeros +int __ctzsi2(si_int a); // count trailing zeros +int __ctzdi2(di_int a); // count trailing zeros +int __ctzti2(ti_int a); // count trailing zeros + +int __ffssi2(si_int a); // find least significant 1 bit +int __ffsdi2(di_int a); // find least significant 1 bit +int __ffsti2(ti_int a); // find least significant 1 bit + +int __paritysi2(si_int a); // bit parity +int __paritydi2(di_int a); // bit parity +int __parityti2(ti_int a); // bit parity + +int __popcountsi2(si_int a); // bit population +int __popcountdi2(di_int a); // bit population +int __popcountti2(ti_int a); // bit population + +uint32_t __bswapsi2(uint32_t a); // a byteswapped +uint64_t __bswapdi2(uint64_t a); // a byteswapped + +// Integral arithmetic + +di_int __negdi2 (di_int a); // -a +ti_int __negti2 (ti_int a); // -a +di_int __muldi3 (di_int a, di_int b); // a * b +ti_int __multi3 (ti_int a, ti_int b); // a * b +si_int __divsi3 (si_int a, si_int b); // a / b signed +di_int __divdi3 (di_int a, di_int b); // a / b signed +ti_int __divti3 (ti_int a, ti_int b); // a / b signed +su_int __udivsi3 (su_int n, su_int d); // a / b unsigned +du_int __udivdi3 (du_int a, du_int b); // a / b unsigned +tu_int __udivti3 (tu_int a, tu_int b); // a / b unsigned +si_int __modsi3 (si_int a, si_int b); // a % b signed +di_int __moddi3 (di_int a, di_int b); // a % b signed +ti_int __modti3 (ti_int a, ti_int b); // a % b signed +su_int __umodsi3 (su_int a, su_int b); // a % b unsigned +du_int __umoddi3 (du_int a, du_int b); // a % b unsigned +tu_int __umodti3 (tu_int a, tu_int b); // a % b unsigned +du_int __udivmoddi4(du_int a, du_int b, du_int* rem); // a / b, *rem = a % b unsigned +tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem); // a / b, *rem = a % b unsigned +su_int __udivmodsi4(su_int a, su_int b, su_int* rem); // a / b, *rem = a % b unsigned +si_int __divmodsi4(si_int a, si_int b, si_int* rem); // a / b, *rem = a % b signed +di_int __divmoddi4(di_int a, di_int b, di_int* rem); // a / b, *rem = a % b signed +ti_int __divmodti4(ti_int a, ti_int b, ti_int* rem); // a / b, *rem = a % b signed + + + +// Integral arithmetic with trapping overflow + +si_int __absvsi2(si_int a); // abs(a) +di_int __absvdi2(di_int a); // abs(a) +ti_int __absvti2(ti_int a); // abs(a) + +si_int __negvsi2(si_int a); // -a +di_int __negvdi2(di_int a); // -a +ti_int __negvti2(ti_int a); // -a + +si_int __addvsi3(si_int a, si_int b); // a + b +di_int __addvdi3(di_int a, di_int b); // a + b +ti_int __addvti3(ti_int a, ti_int b); // a + b + +si_int __subvsi3(si_int a, si_int b); // a - b +di_int __subvdi3(di_int a, di_int b); // a - b +ti_int __subvti3(ti_int a, ti_int b); // a - b + +si_int __mulvsi3(si_int a, si_int b); // a * b +di_int __mulvdi3(di_int a, di_int b); // a * b +ti_int __mulvti3(ti_int a, ti_int b); // a * b + + +// Integral arithmetic which returns if overflow + +si_int __mulosi4(si_int a, si_int b, int* overflow); // a * b, overflow set to one if result not in signed range +di_int __mulodi4(di_int a, di_int b, int* overflow); // a * b, overflow set to one if result not in signed range +ti_int __muloti4(ti_int a, ti_int b, int* overflow); // a * b, overflow set to + one if result not in signed range + + +// Integral comparison: a < b -> 0 +// a == b -> 1 +// a > b -> 2 + +si_int __cmpdi2 (di_int a, di_int b); +si_int __cmpti2 (ti_int a, ti_int b); +si_int __ucmpdi2(du_int a, du_int b); +si_int __ucmpti2(tu_int a, tu_int b); + +// Integral / floating point conversion + +di_int __fixsfdi( float a); +di_int __fixdfdi( double a); +di_int __fixxfdi(long double a); + +ti_int __fixsfti( float a); +ti_int __fixdfti( double a); +ti_int __fixxfti(long double a); +uint64_t __fixtfdi(long double input); // ppc only, doesn't match documentation + +su_int __fixunssfsi( float a); +su_int __fixunsdfsi( double a); +su_int __fixunsxfsi(long double a); + +du_int __fixunssfdi( float a); +du_int __fixunsdfdi( double a); +du_int __fixunsxfdi(long double a); + +tu_int __fixunssfti( float a); +tu_int __fixunsdfti( double a); +tu_int __fixunsxfti(long double a); +uint64_t __fixunstfdi(long double input); // ppc only + +float __floatdisf(di_int a); +double __floatdidf(di_int a); +long double __floatdixf(di_int a); +long double __floatditf(int64_t a); // ppc only + +float __floattisf(ti_int a); +double __floattidf(ti_int a); +long double __floattixf(ti_int a); + +float __floatundisf(du_int a); +double __floatundidf(du_int a); +long double __floatundixf(du_int a); +long double __floatunditf(uint64_t a); // ppc only + +float __floatuntisf(tu_int a); +double __floatuntidf(tu_int a); +long double __floatuntixf(tu_int a); + +// Floating point raised to integer power + +float __powisf2( float a, int b); // a ^ b +double __powidf2( double a, int b); // a ^ b +long double __powixf2(long double a, int b); // a ^ b +long double __powitf2(long double a, int b); // ppc only, a ^ b + +// Complex arithmetic + +// (a + ib) * (c + id) + + float _Complex __mulsc3( float a, float b, float c, float d); + double _Complex __muldc3(double a, double b, double c, double d); +long double _Complex __mulxc3(long double a, long double b, + long double c, long double d); +long double _Complex __multc3(long double a, long double b, + long double c, long double d); // ppc only + +// (a + ib) / (c + id) + + float _Complex __divsc3( float a, float b, float c, float d); + double _Complex __divdc3(double a, double b, double c, double d); +long double _Complex __divxc3(long double a, long double b, + long double c, long double d); +long double _Complex __divtc3(long double a, long double b, + long double c, long double d); // ppc only + + +// Runtime support + +// __clear_cache() is used to tell process that new instructions have been +// written to an address range. Necessary on processors that do not have +// a unified instruction and data cache. +void __clear_cache(void* start, void* end); + +// __enable_execute_stack() is used with nested functions when a trampoline +// function is written onto the stack and that page range needs to be made +// executable. +void __enable_execute_stack(void* addr); + +// __gcc_personality_v0() is normally only called by the system unwinder. +// C code (as opposed to C++) normally does not need a personality function +// because there are no catch clauses or destructors to be run. But there +// is a C language extension __attribute__((cleanup(func))) which marks local +// variables as needing the cleanup function "func" to be run when the +// variable goes out of scope. That includes when an exception is thrown, +// so a personality handler is needed. +_Unwind_Reason_Code __gcc_personality_v0(int version, _Unwind_Action actions, + uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject, + _Unwind_Context_t context); + +// for use with some implementations of assert() in +void __eprintf(const char* format, const char* assertion_expression, + const char* line, const char* file); + +// for systems with emulated thread local storage +void* __emutls_get_address(struct __emutls_control*); + + +// Power PC specific functions + +// There is no C interface to the saveFP/restFP functions. They are helper +// functions called by the prolog and epilog of functions that need to save +// a number of non-volatile float point registers. +saveFP +restFP + +// PowerPC has a standard template for trampoline functions. This function +// generates a custom trampoline function with the specific realFunc +// and localsPtr values. +void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, + const void* realFunc, void* localsPtr); + +// adds two 128-bit double-double precision values ( x + y ) +long double __gcc_qadd(long double x, long double y); + +// subtracts two 128-bit double-double precision values ( x - y ) +long double __gcc_qsub(long double x, long double y); + +// multiples two 128-bit double-double precision values ( x * y ) +long double __gcc_qmul(long double x, long double y); + +// divides two 128-bit double-double precision values ( x / y ) +long double __gcc_qdiv(long double a, long double b); + + +// ARM specific functions + +// There is no C interface to the switch* functions. These helper functions +// are only needed by Thumb1 code for efficient switch table generation. +switch16 +switch32 +switch8 +switchu8 + +// There is no C interface to the *_vfp_d8_d15_regs functions. There are +// called in the prolog and epilog of Thumb1 functions. When the C++ ABI use +// SJLJ for exceptions, each function with a catch clause or destructors needs +// to save and restore all registers in it prolog and epilog. But there is +// no way to access vector and high float registers from thumb1 code, so the +// compiler must add call outs to these helper functions in the prolog and +// epilog. +restore_vfp_d8_d15_regs +save_vfp_d8_d15_regs + + +// Note: long ago ARM processors did not have floating point hardware support. +// Floating point was done in software and floating point parameters were +// passed in integer registers. When hardware support was added for floating +// point, new *vfp functions were added to do the same operations but with +// floating point parameters in floating point registers. + +// Undocumented functions + +float __addsf3vfp(float a, float b); // Appears to return a + b +double __adddf3vfp(double a, double b); // Appears to return a + b +float __divsf3vfp(float a, float b); // Appears to return a / b +double __divdf3vfp(double a, double b); // Appears to return a / b +int __eqsf2vfp(float a, float b); // Appears to return one + // iff a == b and neither is NaN. +int __eqdf2vfp(double a, double b); // Appears to return one + // iff a == b and neither is NaN. +double __extendsfdf2vfp(float a); // Appears to convert from + // float to double. +int __fixdfsivfp(double a); // Appears to convert from + // double to int. +int __fixsfsivfp(float a); // Appears to convert from + // float to int. +unsigned int __fixunssfsivfp(float a); // Appears to convert from + // float to unsigned int. +unsigned int __fixunsdfsivfp(double a); // Appears to convert from + // double to unsigned int. +double __floatsidfvfp(int a); // Appears to convert from + // int to double. +float __floatsisfvfp(int a); // Appears to convert from + // int to float. +double __floatunssidfvfp(unsigned int a); // Appears to convert from + // unsigned int to double. +float __floatunssisfvfp(unsigned int a); // Appears to convert from + // unsigned int to float. +int __gedf2vfp(double a, double b); // Appears to return __gedf2 + // (a >= b) +int __gesf2vfp(float a, float b); // Appears to return __gesf2 + // (a >= b) +int __gtdf2vfp(double a, double b); // Appears to return __gtdf2 + // (a > b) +int __gtsf2vfp(float a, float b); // Appears to return __gtsf2 + // (a > b) +int __ledf2vfp(double a, double b); // Appears to return __ledf2 + // (a <= b) +int __lesf2vfp(float a, float b); // Appears to return __lesf2 + // (a <= b) +int __ltdf2vfp(double a, double b); // Appears to return __ltdf2 + // (a < b) +int __ltsf2vfp(float a, float b); // Appears to return __ltsf2 + // (a < b) +double __muldf3vfp(double a, double b); // Appears to return a * b +float __mulsf3vfp(float a, float b); // Appears to return a * b +int __nedf2vfp(double a, double b); // Appears to return __nedf2 + // (a != b) +double __negdf2vfp(double a); // Appears to return -a +float __negsf2vfp(float a); // Appears to return -a +float __negsf2vfp(float a); // Appears to return -a +double __subdf3vfp(double a, double b); // Appears to return a - b +float __subsf3vfp(float a, float b); // Appears to return a - b +float __truncdfsf2vfp(double a); // Appears to convert from + // double to float. +int __unorddf2vfp(double a, double b); // Appears to return __unorddf2 +int __unordsf2vfp(float a, float b); // Appears to return __unordsf2 + + +Preconditions are listed for each function at the definition when there are any. +Any preconditions reflect the specification at +http://gcc.gnu.org/onlinedocs/gccint/Libgcc.html#Libgcc. + +Assumptions are listed in "int_lib.h", and in individual files. Where possible +assumptions are checked at compile time. diff --git a/wasmrt/llvm-builtins/builtins/aarch64/chkstk.S b/wasmrt/llvm-builtins/builtins/aarch64/chkstk.S new file mode 100644 index 0000000..01f9036 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/aarch64/chkstk.S @@ -0,0 +1,35 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "../assembly.h" + +// __chkstk routine +// This routine is windows specific. +// http://msdn.microsoft.com/en-us/library/ms648426.aspx + +// This clobbers registers x16 and x17. +// Does not modify any memory or the stack pointer. + +// mov x15, #256 // Number of bytes of stack, in units of 16 byte +// bl __chkstk +// sub sp, sp, x15, lsl #4 + +#ifdef __aarch64__ + +#define PAGE_SIZE 4096 + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__chkstk) + lsl x16, x15, #4 + mov x17, sp +1: + sub x17, x17, #PAGE_SIZE + subs x16, x16, #PAGE_SIZE + ldr xzr, [x17] + b.gt 1b + + ret +END_COMPILERRT_FUNCTION(__chkstk) + +#endif // __aarch64__ diff --git a/wasmrt/llvm-builtins/builtins/aarch64/fp_mode.c b/wasmrt/llvm-builtins/builtins/aarch64/fp_mode.c new file mode 100644 index 0000000..03d75cd --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/aarch64/fp_mode.c @@ -0,0 +1,60 @@ +//===----- lib/aarch64/fp_mode.c - Floaing-point mode utilities ---*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "../fp_mode.h" + +#define AARCH64_TONEAREST 0x0 +#define AARCH64_UPWARD 0x1 +#define AARCH64_DOWNWARD 0x2 +#define AARCH64_TOWARDZERO 0x3 +#define AARCH64_RMODE_MASK (AARCH64_TONEAREST | AARCH64_UPWARD | \ + AARCH64_DOWNWARD | AARCH64_TOWARDZERO) +#define AARCH64_RMODE_SHIFT 22 + +#define AARCH64_INEXACT 0x10 + +#ifndef __ARM_FP +// For soft float targets, allow changing rounding mode by overriding the weak +// __aarch64_fe_default_rmode symbol. +CRT_FE_ROUND_MODE __attribute__((weak)) __aarch64_fe_default_rmode = + CRT_FE_TONEAREST; +#endif + +CRT_FE_ROUND_MODE __fe_getround(void) { +#ifdef __ARM_FP + uint64_t fpcr; + __asm__ __volatile__("mrs %0, fpcr" : "=r" (fpcr)); + fpcr = fpcr >> AARCH64_RMODE_SHIFT & AARCH64_RMODE_MASK; + switch (fpcr) { + case AARCH64_UPWARD: + return CRT_FE_UPWARD; + case AARCH64_DOWNWARD: + return CRT_FE_DOWNWARD; + case AARCH64_TOWARDZERO: + return CRT_FE_TOWARDZERO; + case AARCH64_TONEAREST: + default: + return CRT_FE_TONEAREST; + } +#else + return __aarch64_fe_default_rmode; +#endif +} + +int __fe_raise_inexact(void) { +#ifdef __ARM_FP + uint64_t fpsr; + __asm__ __volatile__("mrs %0, fpsr" : "=r" (fpsr)); + __asm__ __volatile__("msr fpsr, %0" : : "ri" (fpsr | AARCH64_INEXACT)); + return 0; +#else + return 0; +#endif +} diff --git a/wasmrt/llvm-builtins/builtins/aarch64/lse.S b/wasmrt/llvm-builtins/builtins/aarch64/lse.S new file mode 100644 index 0000000..1fe18f4 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/aarch64/lse.S @@ -0,0 +1,270 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "assembly.h" + +// Out-of-line LSE atomics helpers. Ported from libgcc library. +// N = {1, 2, 4, 8} +// M = {1, 2, 4, 8, 16} +// ORDER = {'relax', 'acq', 'rel', 'acq_rel', 'sync'} +// Routines implemented: +// +// iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr) +// iN __aarch64_swpN_ORDER(iN val, iN *ptr) +// iN __aarch64_ldaddN_ORDER(iN val, iN *ptr) +// iN __aarch64_ldclrN_ORDER(iN val, iN *ptr) +// iN __aarch64_ldeorN_ORDER(iN val, iN *ptr) +// iN __aarch64_ldsetN_ORDER(iN val, iN *ptr) +// +// Routines may modify temporary registers tmp0, tmp1, tmp2, +// return value x0 and the flags only. + +#ifdef __aarch64__ + +#ifdef HAS_ASM_LSE +.arch armv8-a+lse +#else +.arch armv8-a +#endif + +#if !defined(__APPLE__) +HIDDEN(__aarch64_have_lse_atomics) +#else +HIDDEN(___aarch64_have_lse_atomics) +#endif + +// Generate mnemonics for +// L_cas: SIZE: 1,2,4,8,16 MODEL: 1,2,3,4,5 +// L_swp L_ldadd L_ldclr L_ldeor L_ldset: SIZE: 1,2,4,8 MODEL: 1,2,3,4,5 + +#if SIZE == 1 +#define S b +#define UXT uxtb +#define B 0x00000000 +#elif SIZE == 2 +#define S h +#define UXT uxth +#define B 0x40000000 +#elif SIZE == 4 || SIZE == 8 || SIZE == 16 +#define S +#define UXT mov +#if SIZE == 4 +#define B 0x80000000 +#elif SIZE == 8 +#define B 0xc0000000 +#endif +#else +#error +#endif // SIZE + +#if MODEL == 1 +#define SUFF _relax +#define A +#define L +#define M 0x000000 +#define N 0x000000 +#define BARRIER +#elif MODEL == 2 +#define SUFF _acq +#define A a +#define L +#define M 0x400000 +#define N 0x800000 +#define BARRIER +#elif MODEL == 3 +#define SUFF _rel +#define A +#define L l +#define M 0x008000 +#define N 0x400000 +#define BARRIER +#elif MODEL == 4 +#define SUFF _acq_rel +#define A a +#define L l +#define M 0x408000 +#define N 0xc00000 +#define BARRIER +#elif MODEL == 5 +#define SUFF _sync +#ifdef L_swp +// swp has _acq semantics. +#define A a +#define L +#define M 0x400000 +#define N 0x800000 +#else +// All other _sync functions have _seq semantics. +#define A a +#define L l +#define M 0x408000 +#define N 0xc00000 +#endif +#define BARRIER dmb ish +#else +#error +#endif // MODEL + +// Define register size. +#define x(N) GLUE2(x, N) +#define w(N) GLUE2(w, N) +#if SIZE < 8 +#define s(N) w(N) +#else +#define s(N) x(N) +#endif + +#define NAME(BASE) GLUE4(__aarch64_, BASE, SIZE, SUFF) +#if MODEL == 5 +// Drop A for _sync functions. +#define LDXR GLUE3(ld, xr, S) +#else +#define LDXR GLUE4(ld, A, xr, S) +#endif +#define STXR GLUE4(st, L, xr, S) + +// Define temporary registers. +#define tmp0 16 +#define tmp1 17 +#define tmp2 15 + +// Macro for branch to label if no LSE available +.macro JUMP_IF_NOT_LSE label +#if !defined(__APPLE__) + adrp x(tmp0), __aarch64_have_lse_atomics + ldrb w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics] +#else + adrp x(tmp0), ___aarch64_have_lse_atomics@page + ldrb w(tmp0), [x(tmp0), ___aarch64_have_lse_atomics@pageoff] +#endif + cbz w(tmp0), \label +.endm + +#ifdef L_cas +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(cas)) + JUMP_IF_NOT_LSE 8f +#if SIZE < 16 +#ifdef HAS_ASM_LSE +#define CAS GLUE4(cas, A, L, S) s(0), s(1), [x2] +#else +#define CAS .inst 0x08a07c41 + B + M +#endif + CAS // s(0), s(1), [x2] + ret +8: + UXT s(tmp0), s(0) +0: + LDXR s(0), [x2] + cmp s(0), s(tmp0) + bne 1f + STXR w(tmp1), s(1), [x2] + cbnz w(tmp1), 0b +1: + BARRIER + ret +#else +#if MODEL == 5 +// Drop A for _sync functions. +#define LDXP GLUE2(ld, xp) +#else +#define LDXP GLUE3(ld, A, xp) +#endif +#define STXP GLUE3(st, L, xp) +#ifdef HAS_ASM_LSE +#define CASP GLUE3(casp, A, L) x0, x1, x2, x3, [x4] +#else +#define CASP .inst 0x48207c82 + M +#endif + + CASP // x0, x1, x2, x3, [x4] + ret +8: + mov x(tmp0), x0 + mov x(tmp1), x1 +0: + LDXP x0, x1, [x4] + cmp x0, x(tmp0) + ccmp x1, x(tmp1), #0, eq + bne 1f + STXP w(tmp2), x2, x3, [x4] + cbnz w(tmp2), 0b +1: + BARRIER + ret +#endif +END_COMPILERRT_OUTLINE_FUNCTION(NAME(cas)) +#endif // L_cas + +#ifdef L_swp +#ifdef HAS_ASM_LSE +#define SWP GLUE4(swp, A, L, S) s(0), s(0), [x1] +#else +#define SWP .inst 0x38208020 + B + N +#endif +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(swp)) + JUMP_IF_NOT_LSE 8f + SWP // s(0), s(0), [x1] + ret +8: + mov s(tmp0), s(0) +0: + LDXR s(0), [x1] + STXR w(tmp1), s(tmp0), [x1] + cbnz w(tmp1), 0b + BARRIER + ret +END_COMPILERRT_OUTLINE_FUNCTION(NAME(swp)) +#endif // L_swp + +#if defined(L_ldadd) || defined(L_ldclr) || \ + defined(L_ldeor) || defined(L_ldset) + +#ifdef L_ldadd +#define LDNM ldadd +#define OP add +#define OPN 0x0000 +#elif defined(L_ldclr) +#define LDNM ldclr +#define OP bic +#define OPN 0x1000 +#elif defined(L_ldeor) +#define LDNM ldeor +#define OP eor +#define OPN 0x2000 +#elif defined(L_ldset) +#define LDNM ldset +#define OP orr +#define OPN 0x3000 +#else +#error +#endif + +#ifdef HAS_ASM_LSE +#define LDOP GLUE4(LDNM, A, L, S) s(0), s(0), [x1] +#else +#define LDOP .inst 0x38200020 + OPN + B + N +#endif + +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(LDNM)) + JUMP_IF_NOT_LSE 8f + LDOP // s(0), s(0), [x1] + ret +8: + mov s(tmp0), s(0) +0: + LDXR s(0), [x1] + OP s(tmp1), s(0), s(tmp0) + STXR w(tmp2), s(tmp1), [x1] + cbnz w(tmp2), 0b + BARRIER + ret +END_COMPILERRT_OUTLINE_FUNCTION(NAME(LDNM)) +#endif // L_ldadd L_ldclr L_ldeor L_ldset + +NO_EXEC_STACK_DIRECTIVE + +// GNU property note for BTI and PAC +GNU_PROPERTY_BTI_PAC + +#endif // __aarch64__ diff --git a/wasmrt/llvm-builtins/builtins/absvdi2.c b/wasmrt/llvm-builtins/builtins/absvdi2.c new file mode 100644 index 0000000..291ab5f --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/absvdi2.c @@ -0,0 +1,25 @@ +//===-- absvdi2.c - Implement __absvdi2 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __absvdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: absolute value + +// Effects: aborts if abs(x) < 0 + +COMPILER_RT_ABI di_int __absvdi2(di_int a) { + const int N = (int)(sizeof(di_int) * CHAR_BIT); + if (a == ((di_int)((du_int)1 << (N - 1)))) + compilerrt_abort(); + const di_int t = a >> (N - 1); + return (a ^ t) - t; +} diff --git a/wasmrt/llvm-builtins/builtins/absvsi2.c b/wasmrt/llvm-builtins/builtins/absvsi2.c new file mode 100644 index 0000000..9977c33 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/absvsi2.c @@ -0,0 +1,25 @@ +//===-- absvsi2.c - Implement __absvsi2 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __absvsi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: absolute value + +// Effects: aborts if abs(x) < 0 + +COMPILER_RT_ABI si_int __absvsi2(si_int a) { + const int N = (int)(sizeof(si_int) * CHAR_BIT); + if (a == ((si_int)((su_int)1 << (N - 1)))) + compilerrt_abort(); + const si_int t = a >> (N - 1); + return (a ^ t) - t; +} diff --git a/wasmrt/llvm-builtins/builtins/absvti2.c b/wasmrt/llvm-builtins/builtins/absvti2.c new file mode 100644 index 0000000..491d99d --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/absvti2.c @@ -0,0 +1,29 @@ +//===-- absvti2.c - Implement __absvdi2 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __absvti2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +// Returns: absolute value + +// Effects: aborts if abs(x) < 0 + +COMPILER_RT_ABI ti_int __absvti2(ti_int a) { + const int N = (int)(sizeof(ti_int) * CHAR_BIT); + if (a == ((ti_int)1 << (N - 1))) + compilerrt_abort(); + const ti_int s = a >> (N - 1); + return (a ^ s) - s; +} + +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/adddf3.c b/wasmrt/llvm-builtins/builtins/adddf3.c new file mode 100644 index 0000000..26f11bf --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/adddf3.c @@ -0,0 +1,24 @@ +//===-- lib/adddf3.c - Double-precision addition ------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float addition. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_add_impl.inc" + +COMPILER_RT_ABI double __adddf3(double a, double b) { return __addXf3__(a, b); } + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI double __aeabi_dadd(double a, double b) { return __adddf3(a, b); } +#else +COMPILER_RT_ALIAS(__adddf3, __aeabi_dadd) +#endif +#endif diff --git a/wasmrt/llvm-builtins/builtins/addsf3.c b/wasmrt/llvm-builtins/builtins/addsf3.c new file mode 100644 index 0000000..9f1d517 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/addsf3.c @@ -0,0 +1,24 @@ +//===-- lib/addsf3.c - Single-precision addition ------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float addition. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_add_impl.inc" + +COMPILER_RT_ABI float __addsf3(float a, float b) { return __addXf3__(a, b); } + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI float __aeabi_fadd(float a, float b) { return __addsf3(a, b); } +#else +COMPILER_RT_ALIAS(__addsf3, __aeabi_fadd) +#endif +#endif diff --git a/wasmrt/llvm-builtins/builtins/addtf3.c b/wasmrt/llvm-builtins/builtins/addtf3.c new file mode 100644 index 0000000..2cb3a4d --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/addtf3.c @@ -0,0 +1,23 @@ +//===-- lib/addtf3.c - Quad-precision addition --------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements quad-precision soft-float addition. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_TF_MODE) +#include "fp_add_impl.inc" + +COMPILER_RT_ABI fp_t __addtf3(fp_t a, fp_t b) { + return __addXf3__(a, b); +} + +#endif diff --git a/wasmrt/llvm-builtins/builtins/addvdi3.c b/wasmrt/llvm-builtins/builtins/addvdi3.c new file mode 100644 index 0000000..28661fd --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/addvdi3.c @@ -0,0 +1,29 @@ +//===-- addvdi3.c - Implement __addvdi3 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __addvdi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: a + b + +// Effects: aborts if a + b overflows + +COMPILER_RT_ABI di_int __addvdi3(di_int a, di_int b) { + di_int s = (du_int)a + (du_int)b; + if (b >= 0) { + if (s < a) + compilerrt_abort(); + } else { + if (s >= a) + compilerrt_abort(); + } + return s; +} diff --git a/wasmrt/llvm-builtins/builtins/addvsi3.c b/wasmrt/llvm-builtins/builtins/addvsi3.c new file mode 100644 index 0000000..4040023 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/addvsi3.c @@ -0,0 +1,29 @@ +//===-- addvsi3.c - Implement __addvsi3 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __addvsi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: a + b + +// Effects: aborts if a + b overflows + +COMPILER_RT_ABI si_int __addvsi3(si_int a, si_int b) { + si_int s = (su_int)a + (su_int)b; + if (b >= 0) { + if (s < a) + compilerrt_abort(); + } else { + if (s >= a) + compilerrt_abort(); + } + return s; +} diff --git a/wasmrt/llvm-builtins/builtins/addvti3.c b/wasmrt/llvm-builtins/builtins/addvti3.c new file mode 100644 index 0000000..aa70987 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/addvti3.c @@ -0,0 +1,33 @@ +//===-- addvti3.c - Implement __addvti3 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __addvti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +// Returns: a + b + +// Effects: aborts if a + b overflows + +COMPILER_RT_ABI ti_int __addvti3(ti_int a, ti_int b) { + ti_int s = (tu_int)a + (tu_int)b; + if (b >= 0) { + if (s < a) + compilerrt_abort(); + } else { + if (s >= a) + compilerrt_abort(); + } + return s; +} + +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/apple_versioning.c b/wasmrt/llvm-builtins/builtins/apple_versioning.c new file mode 100644 index 0000000..83d4194 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/apple_versioning.c @@ -0,0 +1,339 @@ +//===-- apple_versioning.c - Adds versioning symbols for ld ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __APPLE__ +#include + +#if __IPHONE_OS_VERSION_MIN_REQUIRED +#define NOT_HERE_BEFORE_10_6(sym) +#define NOT_HERE_IN_10_8_AND_EARLIER(sym) \ + extern const char sym##_tmp61 __asm("$ld$hide$os6.1$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp61 = 0; \ + extern const char sym##_tmp60 __asm("$ld$hide$os6.0$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp60 = 0; \ + extern const char sym##_tmp51 __asm("$ld$hide$os5.1$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp51 = 0; \ + extern const char sym##_tmp50 __asm("$ld$hide$os5.0$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp50 = 0; +#else +#define NOT_HERE_BEFORE_10_6(sym) \ + extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ + extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp5 = 0; +#define NOT_HERE_IN_10_8_AND_EARLIER(sym) \ + extern const char sym##_tmp8 __asm("$ld$hide$os10.8$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp8 = 0; \ + extern const char sym##_tmp7 __asm("$ld$hide$os10.7$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp7 = 0; \ + extern const char sym##_tmp6 __asm("$ld$hide$os10.6$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp6 = 0; +#endif + +// Symbols in libSystem.dylib in 10.6 and later, +// but are in libgcc_s.dylib in earlier versions + +NOT_HERE_BEFORE_10_6(__absvdi2) +NOT_HERE_BEFORE_10_6(__absvsi2) +NOT_HERE_BEFORE_10_6(__absvti2) +NOT_HERE_BEFORE_10_6(__addvdi3) +NOT_HERE_BEFORE_10_6(__addvsi3) +NOT_HERE_BEFORE_10_6(__addvti3) +NOT_HERE_BEFORE_10_6(__ashldi3) +NOT_HERE_BEFORE_10_6(__ashlti3) +NOT_HERE_BEFORE_10_6(__ashrdi3) +NOT_HERE_BEFORE_10_6(__ashrti3) +NOT_HERE_BEFORE_10_6(__clear_cache) +NOT_HERE_BEFORE_10_6(__clzdi2) +NOT_HERE_BEFORE_10_6(__clzsi2) +NOT_HERE_BEFORE_10_6(__clzti2) +NOT_HERE_BEFORE_10_6(__cmpdi2) +NOT_HERE_BEFORE_10_6(__cmpti2) +NOT_HERE_BEFORE_10_6(__ctzdi2) +NOT_HERE_BEFORE_10_6(__ctzsi2) +NOT_HERE_BEFORE_10_6(__ctzti2) +NOT_HERE_BEFORE_10_6(__divdc3) +NOT_HERE_BEFORE_10_6(__divdi3) +NOT_HERE_BEFORE_10_6(__divsc3) +NOT_HERE_BEFORE_10_6(__divtc3) +NOT_HERE_BEFORE_10_6(__divti3) +NOT_HERE_BEFORE_10_6(__divxc3) +NOT_HERE_BEFORE_10_6(__enable_execute_stack) +NOT_HERE_BEFORE_10_6(__ffsdi2) +NOT_HERE_BEFORE_10_6(__ffsti2) +NOT_HERE_BEFORE_10_6(__fixdfdi) +NOT_HERE_BEFORE_10_6(__fixdfti) +NOT_HERE_BEFORE_10_6(__fixsfdi) +NOT_HERE_BEFORE_10_6(__fixsfti) +NOT_HERE_BEFORE_10_6(__fixtfdi) +NOT_HERE_BEFORE_10_6(__fixunsdfdi) +NOT_HERE_BEFORE_10_6(__fixunsdfsi) +NOT_HERE_BEFORE_10_6(__fixunsdfti) +NOT_HERE_BEFORE_10_6(__fixunssfdi) +NOT_HERE_BEFORE_10_6(__fixunssfsi) +NOT_HERE_BEFORE_10_6(__fixunssfti) +NOT_HERE_BEFORE_10_6(__fixunstfdi) +NOT_HERE_BEFORE_10_6(__fixunsxfdi) +NOT_HERE_BEFORE_10_6(__fixunsxfsi) +NOT_HERE_BEFORE_10_6(__fixunsxfti) +NOT_HERE_BEFORE_10_6(__fixxfdi) +NOT_HERE_BEFORE_10_6(__fixxfti) +NOT_HERE_BEFORE_10_6(__floatdidf) +NOT_HERE_BEFORE_10_6(__floatdisf) +NOT_HERE_BEFORE_10_6(__floatditf) +NOT_HERE_BEFORE_10_6(__floatdixf) +NOT_HERE_BEFORE_10_6(__floattidf) +NOT_HERE_BEFORE_10_6(__floattisf) +NOT_HERE_BEFORE_10_6(__floattixf) +NOT_HERE_BEFORE_10_6(__floatundidf) +NOT_HERE_BEFORE_10_6(__floatundisf) +NOT_HERE_BEFORE_10_6(__floatunditf) +NOT_HERE_BEFORE_10_6(__floatundixf) +NOT_HERE_BEFORE_10_6(__floatuntidf) +NOT_HERE_BEFORE_10_6(__floatuntisf) +NOT_HERE_BEFORE_10_6(__floatuntixf) +NOT_HERE_BEFORE_10_6(__gcc_personality_v0) +NOT_HERE_BEFORE_10_6(__lshrdi3) +NOT_HERE_BEFORE_10_6(__lshrti3) +NOT_HERE_BEFORE_10_6(__moddi3) +NOT_HERE_BEFORE_10_6(__modti3) +NOT_HERE_BEFORE_10_6(__muldc3) +NOT_HERE_BEFORE_10_6(__muldi3) +NOT_HERE_BEFORE_10_6(__mulsc3) +NOT_HERE_BEFORE_10_6(__multc3) +NOT_HERE_BEFORE_10_6(__multi3) +NOT_HERE_BEFORE_10_6(__mulvdi3) +NOT_HERE_BEFORE_10_6(__mulvsi3) +NOT_HERE_BEFORE_10_6(__mulvti3) +NOT_HERE_BEFORE_10_6(__mulxc3) +NOT_HERE_BEFORE_10_6(__negdi2) +NOT_HERE_BEFORE_10_6(__negti2) +NOT_HERE_BEFORE_10_6(__negvdi2) +NOT_HERE_BEFORE_10_6(__negvsi2) +NOT_HERE_BEFORE_10_6(__negvti2) +NOT_HERE_BEFORE_10_6(__paritydi2) +NOT_HERE_BEFORE_10_6(__paritysi2) +NOT_HERE_BEFORE_10_6(__parityti2) +NOT_HERE_BEFORE_10_6(__popcountdi2) +NOT_HERE_BEFORE_10_6(__popcountsi2) +NOT_HERE_BEFORE_10_6(__popcountti2) +NOT_HERE_BEFORE_10_6(__powidf2) +NOT_HERE_BEFORE_10_6(__powisf2) +NOT_HERE_BEFORE_10_6(__powitf2) +NOT_HERE_BEFORE_10_6(__powixf2) +NOT_HERE_BEFORE_10_6(__subvdi3) +NOT_HERE_BEFORE_10_6(__subvsi3) +NOT_HERE_BEFORE_10_6(__subvti3) +NOT_HERE_BEFORE_10_6(__ucmpdi2) +NOT_HERE_BEFORE_10_6(__ucmpti2) +NOT_HERE_BEFORE_10_6(__udivdi3) +NOT_HERE_BEFORE_10_6(__udivmoddi4) +NOT_HERE_BEFORE_10_6(__udivmodti4) +NOT_HERE_BEFORE_10_6(__udivti3) +NOT_HERE_BEFORE_10_6(__umoddi3) +NOT_HERE_BEFORE_10_6(__umodti3) + +#if __powerpc__ +NOT_HERE_BEFORE_10_6(__gcc_qadd) +NOT_HERE_BEFORE_10_6(__gcc_qdiv) +NOT_HERE_BEFORE_10_6(__gcc_qmul) +NOT_HERE_BEFORE_10_6(__gcc_qsub) +NOT_HERE_BEFORE_10_6(__trampoline_setup) +#endif // __powerpc__ + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_compare_exchange_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_exchange_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_add_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_and_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_or_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_sub_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_fetch_xor_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_load_8) + +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_1) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_2) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_4) +NOT_HERE_IN_10_8_AND_EARLIER(__atomic_store_8) + +#if __arm__ && __DYNAMIC__ +#define NOT_HERE_UNTIL_AFTER_4_3(sym) \ + extern const char sym##_tmp1 __asm("$ld$hide$os3.0$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp1 = 0; \ + extern const char sym##_tmp2 __asm("$ld$hide$os3.1$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp2 = 0; \ + extern const char sym##_tmp3 __asm("$ld$hide$os3.2$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp3 = 0; \ + extern const char sym##_tmp4 __asm("$ld$hide$os4.0$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ + extern const char sym##_tmp5 __asm("$ld$hide$os4.1$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp5 = 0; \ + extern const char sym##_tmp6 __asm("$ld$hide$os4.2$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp6 = 0; \ + extern const char sym##_tmp7 __asm("$ld$hide$os4.3$_" #sym); \ + __attribute__((visibility("default"))) const char sym##_tmp7 = 0; + +NOT_HERE_UNTIL_AFTER_4_3(__absvdi2) +NOT_HERE_UNTIL_AFTER_4_3(__absvsi2) +NOT_HERE_UNTIL_AFTER_4_3(__adddf3) +NOT_HERE_UNTIL_AFTER_4_3(__adddf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__addsf3) +NOT_HERE_UNTIL_AFTER_4_3(__addsf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__addvdi3) +NOT_HERE_UNTIL_AFTER_4_3(__addvsi3) +NOT_HERE_UNTIL_AFTER_4_3(__ashldi3) +NOT_HERE_UNTIL_AFTER_4_3(__ashrdi3) +NOT_HERE_UNTIL_AFTER_4_3(__bswapdi2) +NOT_HERE_UNTIL_AFTER_4_3(__bswapsi2) +NOT_HERE_UNTIL_AFTER_4_3(__clzdi2) +NOT_HERE_UNTIL_AFTER_4_3(__clzsi2) +NOT_HERE_UNTIL_AFTER_4_3(__cmpdi2) +NOT_HERE_UNTIL_AFTER_4_3(__ctzdi2) +NOT_HERE_UNTIL_AFTER_4_3(__ctzsi2) +NOT_HERE_UNTIL_AFTER_4_3(__divdc3) +NOT_HERE_UNTIL_AFTER_4_3(__divdf3) +NOT_HERE_UNTIL_AFTER_4_3(__divdf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__divdi3) +NOT_HERE_UNTIL_AFTER_4_3(__divsc3) +NOT_HERE_UNTIL_AFTER_4_3(__divsf3) +NOT_HERE_UNTIL_AFTER_4_3(__divsf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__divsi3) +NOT_HERE_UNTIL_AFTER_4_3(__eqdf2) +NOT_HERE_UNTIL_AFTER_4_3(__eqdf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__eqsf2) +NOT_HERE_UNTIL_AFTER_4_3(__eqsf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__extendsfdf2) +NOT_HERE_UNTIL_AFTER_4_3(__extendsfdf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__ffsdi2) +NOT_HERE_UNTIL_AFTER_4_3(__fixdfdi) +NOT_HERE_UNTIL_AFTER_4_3(__fixdfsi) +NOT_HERE_UNTIL_AFTER_4_3(__fixdfsivfp) +NOT_HERE_UNTIL_AFTER_4_3(__fixsfdi) +NOT_HERE_UNTIL_AFTER_4_3(__fixsfsi) +NOT_HERE_UNTIL_AFTER_4_3(__fixsfsivfp) +NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfdi) +NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfsi) +NOT_HERE_UNTIL_AFTER_4_3(__fixunsdfsivfp) +NOT_HERE_UNTIL_AFTER_4_3(__fixunssfdi) +NOT_HERE_UNTIL_AFTER_4_3(__fixunssfsi) +NOT_HERE_UNTIL_AFTER_4_3(__fixunssfsivfp) +NOT_HERE_UNTIL_AFTER_4_3(__floatdidf) +NOT_HERE_UNTIL_AFTER_4_3(__floatdisf) +NOT_HERE_UNTIL_AFTER_4_3(__floatsidf) +NOT_HERE_UNTIL_AFTER_4_3(__floatsidfvfp) +NOT_HERE_UNTIL_AFTER_4_3(__floatsisf) +NOT_HERE_UNTIL_AFTER_4_3(__floatsisfvfp) +NOT_HERE_UNTIL_AFTER_4_3(__floatundidf) +NOT_HERE_UNTIL_AFTER_4_3(__floatundisf) +NOT_HERE_UNTIL_AFTER_4_3(__floatunsidf) +NOT_HERE_UNTIL_AFTER_4_3(__floatunsisf) +NOT_HERE_UNTIL_AFTER_4_3(__floatunssidfvfp) +NOT_HERE_UNTIL_AFTER_4_3(__floatunssisfvfp) +NOT_HERE_UNTIL_AFTER_4_3(__gedf2) +NOT_HERE_UNTIL_AFTER_4_3(__gedf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__gesf2) +NOT_HERE_UNTIL_AFTER_4_3(__gesf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__gtdf2) +NOT_HERE_UNTIL_AFTER_4_3(__gtdf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__gtsf2) +NOT_HERE_UNTIL_AFTER_4_3(__gtsf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__ledf2) +NOT_HERE_UNTIL_AFTER_4_3(__ledf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__lesf2) +NOT_HERE_UNTIL_AFTER_4_3(__lesf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__lshrdi3) +NOT_HERE_UNTIL_AFTER_4_3(__ltdf2) +NOT_HERE_UNTIL_AFTER_4_3(__ltdf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__ltsf2) +NOT_HERE_UNTIL_AFTER_4_3(__ltsf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__moddi3) +NOT_HERE_UNTIL_AFTER_4_3(__modsi3) +NOT_HERE_UNTIL_AFTER_4_3(__muldc3) +NOT_HERE_UNTIL_AFTER_4_3(__muldf3) +NOT_HERE_UNTIL_AFTER_4_3(__muldf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__muldi3) +NOT_HERE_UNTIL_AFTER_4_3(__mulsc3) +NOT_HERE_UNTIL_AFTER_4_3(__mulsf3) +NOT_HERE_UNTIL_AFTER_4_3(__mulsf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__mulvdi3) +NOT_HERE_UNTIL_AFTER_4_3(__mulvsi3) +NOT_HERE_UNTIL_AFTER_4_3(__nedf2) +NOT_HERE_UNTIL_AFTER_4_3(__nedf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__negdi2) +NOT_HERE_UNTIL_AFTER_4_3(__negvdi2) +NOT_HERE_UNTIL_AFTER_4_3(__negvsi2) +NOT_HERE_UNTIL_AFTER_4_3(__nesf2) +NOT_HERE_UNTIL_AFTER_4_3(__nesf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__paritydi2) +NOT_HERE_UNTIL_AFTER_4_3(__paritysi2) +NOT_HERE_UNTIL_AFTER_4_3(__popcountdi2) +NOT_HERE_UNTIL_AFTER_4_3(__popcountsi2) +NOT_HERE_UNTIL_AFTER_4_3(__powidf2) +NOT_HERE_UNTIL_AFTER_4_3(__powisf2) +NOT_HERE_UNTIL_AFTER_4_3(__subdf3) +NOT_HERE_UNTIL_AFTER_4_3(__subdf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__subsf3) +NOT_HERE_UNTIL_AFTER_4_3(__subsf3vfp) +NOT_HERE_UNTIL_AFTER_4_3(__subvdi3) +NOT_HERE_UNTIL_AFTER_4_3(__subvsi3) +NOT_HERE_UNTIL_AFTER_4_3(__truncdfsf2) +NOT_HERE_UNTIL_AFTER_4_3(__truncdfsf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__ucmpdi2) +NOT_HERE_UNTIL_AFTER_4_3(__udivdi3) +NOT_HERE_UNTIL_AFTER_4_3(__udivmoddi4) +NOT_HERE_UNTIL_AFTER_4_3(__udivsi3) +NOT_HERE_UNTIL_AFTER_4_3(__umoddi3) +NOT_HERE_UNTIL_AFTER_4_3(__umodsi3) +NOT_HERE_UNTIL_AFTER_4_3(__unorddf2) +NOT_HERE_UNTIL_AFTER_4_3(__unorddf2vfp) +NOT_HERE_UNTIL_AFTER_4_3(__unordsf2) +NOT_HERE_UNTIL_AFTER_4_3(__unordsf2vfp) + +NOT_HERE_UNTIL_AFTER_4_3(__divmodsi4) +NOT_HERE_UNTIL_AFTER_4_3(__udivmodsi4) +#endif // __arm__ && __DYNAMIC__ + +#else // !__APPLE__ + +extern int avoid_empty_file; + +#endif // !__APPLE__ diff --git a/wasmrt/llvm-builtins/builtins/arm/adddf3vfp.S b/wasmrt/llvm-builtins/builtins/arm/adddf3vfp.S new file mode 100644 index 0000000..1a271db --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/adddf3vfp.S @@ -0,0 +1,31 @@ +//===-- adddf3vfp.S - Implement adddf3vfp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// double __adddf3vfp(double a, double b) { return a + b; } +// +// Adds two double precision floating point numbers using the Darwin +// calling convention where double arguments are passsed in GPR pairs + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__adddf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vadd.f64 d0, d0, d1 +#else + vmov d6, r0, r1 // move first param from r0/r1 pair into d6 + vmov d7, r2, r3 // move second param from r2/r3 pair into d7 + vadd.f64 d6, d6, d7 + vmov r0, r1, d6 // move result back to r0/r1 pair +#endif + bx lr +END_COMPILERRT_FUNCTION(__adddf3vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/addsf3.S b/wasmrt/llvm-builtins/builtins/arm/addsf3.S new file mode 100644 index 0000000..aa4d404 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/addsf3.S @@ -0,0 +1,276 @@ +//===-- addsf3.S - Adds two single precision floating pointer numbers-----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __addsf3 (single precision floating pointer number +// addition with the IEEE-754 default rounding (to nearest, ties to even) +// function for the ARM Thumb1 ISA. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" +#define significandBits 23 +#define typeWidth 32 + + .syntax unified + .text + .thumb + .p2align 2 + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fadd, __addsf3) + +DEFINE_COMPILERRT_THUMB_FUNCTION(__addsf3) + push {r4, r5, r6, r7, lr} + // Get the absolute value of a and b. + lsls r2, r0, #1 + lsls r3, r1, #1 + lsrs r2, r2, #1 // aAbs + beq LOCAL_LABEL(a_zero_nan_inf) + lsrs r3, r3, #1 // bAbs + beq LOCAL_LABEL(zero_nan_inf) + + // Detect if a or b is infinity or Nan. + lsrs r6, r2, #(significandBits) + lsrs r7, r3, #(significandBits) + cmp r6, #0xFF + beq LOCAL_LABEL(zero_nan_inf) + cmp r7, #0xFF + beq LOCAL_LABEL(zero_nan_inf) + + // Swap Rep and Abs so that a and aAbs has the larger absolute value. + cmp r2, r3 + bhs LOCAL_LABEL(no_swap) + movs r4, r0 + movs r5, r2 + movs r0, r1 + movs r2, r3 + movs r1, r4 + movs r3, r5 +LOCAL_LABEL(no_swap): + + // Get the significands and shift them to give us round, guard and sticky. + lsls r4, r0, #(typeWidth - significandBits) + lsrs r4, r4, #(typeWidth - significandBits - 3) // aSignificand << 3 + lsls r5, r1, #(typeWidth - significandBits) + lsrs r5, r5, #(typeWidth - significandBits - 3) // bSignificand << 3 + + // Get the implicitBit. + movs r6, #1 + lsls r6, r6, #(significandBits + 3) + + // Get aExponent and set implicit bit if necessary. + lsrs r2, r2, #(significandBits) + beq LOCAL_LABEL(a_done_implicit_bit) + orrs r4, r6 +LOCAL_LABEL(a_done_implicit_bit): + + // Get bExponent and set implicit bit if necessary. + lsrs r3, r3, #(significandBits) + beq LOCAL_LABEL(b_done_implicit_bit) + orrs r5, r6 +LOCAL_LABEL(b_done_implicit_bit): + + // Get the difference in exponents. + subs r6, r2, r3 + beq LOCAL_LABEL(done_align) + + // If b is denormal, then a must be normal as align > 0, and we only need to + // right shift bSignificand by (align - 1) bits. + cmp r3, #0 + bne 1f + subs r6, r6, #1 +1: + + // No longer needs bExponent. r3 is dead here. + // Set sticky bits of b: sticky = bSignificand << (typeWidth - align). + movs r3, #(typeWidth) + subs r3, r3, r6 + movs r7, r5 + lsls r7, r3 + beq 1f + movs r7, #1 +1: + + // bSignificand = bSignificand >> align | sticky; + lsrs r5, r6 + orrs r5, r7 + bne LOCAL_LABEL(done_align) + movs r5, #1 // sticky; b is known to be non-zero. + +LOCAL_LABEL(done_align): + // isSubtraction = (aRep ^ bRep) >> 31; + movs r7, r0 + eors r7, r1 + lsrs r7, #31 + bne LOCAL_LABEL(do_substraction) + + // Same sign, do Addition. + + // aSignificand += bSignificand; + adds r4, r4, r5 + + // Check carry bit. + movs r6, #1 + lsls r6, r6, #(significandBits + 3 + 1) + movs r7, r4 + ands r7, r6 + beq LOCAL_LABEL(form_result) + // If the addition carried up, we need to right-shift the result and + // adjust the exponent. + movs r7, r4 + movs r6, #1 + ands r7, r6 // sticky = aSignificand & 1; + lsrs r4, #1 + orrs r4, r7 // result Significand + adds r2, #1 // result Exponent + // If we have overflowed the type, return +/- infinity. + cmp r2, 0xFF + beq LOCAL_LABEL(ret_inf) + +LOCAL_LABEL(form_result): + // Shift the sign, exponent and significand into place. + lsrs r0, #(typeWidth - 1) + lsls r0, #(typeWidth - 1) // Get Sign. + lsls r2, #(significandBits) + orrs r0, r2 + movs r1, r4 + lsls r4, #(typeWidth - significandBits - 3) + lsrs r4, #(typeWidth - significandBits) + orrs r0, r4 + + // Final rounding. The result may overflow to infinity, but that is the + // correct result in that case. + // roundGuardSticky = aSignificand & 0x7; + movs r2, #0x7 + ands r1, r2 + // if (roundGuardSticky > 0x4) result++; + + cmp r1, #0x4 + blt LOCAL_LABEL(done_round) + beq 1f + adds r0, #1 + pop {r4, r5, r6, r7, pc} +1: + + // if (roundGuardSticky == 0x4) result += result & 1; + movs r1, r0 + lsrs r1, #1 + bcc LOCAL_LABEL(done_round) + adds r0, r0, #1 +LOCAL_LABEL(done_round): + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(do_substraction): + subs r4, r4, r5 // aSignificand -= bSignificand; + beq LOCAL_LABEL(ret_zero) + movs r6, r4 + cmp r2, 0 + beq LOCAL_LABEL(form_result) // if a's exp is 0, no need to normalize. + // If partial cancellation occured, we need to left-shift the result + // and adjust the exponent: + lsrs r6, r6, #(significandBits + 3) + bne LOCAL_LABEL(form_result) + + push {r0, r1, r2, r3} + movs r0, r4 + bl SYMBOL_NAME(__clzsi2) + movs r5, r0 + pop {r0, r1, r2, r3} + // shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3); + subs r5, r5, #(typeWidth - significandBits - 3 - 1) + // aSignificand <<= shift; aExponent -= shift; + lsls r4, r5 + subs r2, r2, r5 + bgt LOCAL_LABEL(form_result) + + // Do normalization if aExponent <= 0. + movs r6, #1 + subs r6, r6, r2 // 1 - aExponent; + movs r2, #0 // aExponent = 0; + movs r3, #(typeWidth) // bExponent is dead. + subs r3, r3, r6 + movs r7, r4 + lsls r7, r3 // stickyBit = (bool)(aSignificant << (typeWidth - align)) + beq 1f + movs r7, #1 +1: + lsrs r4, r6 // aSignificand >> shift + orrs r4, r7 + b LOCAL_LABEL(form_result) + +LOCAL_LABEL(ret_zero): + movs r0, #0 + pop {r4, r5, r6, r7, pc} + + +LOCAL_LABEL(a_zero_nan_inf): + lsrs r3, r3, #1 + +LOCAL_LABEL(zero_nan_inf): + // Here r2 has aAbs, r3 has bAbs + movs r4, #0xFF + lsls r4, r4, #(significandBits) // Make +inf. + + cmp r2, r4 + bhi LOCAL_LABEL(a_is_nan) + cmp r3, r4 + bhi LOCAL_LABEL(b_is_nan) + + cmp r2, r4 + bne LOCAL_LABEL(a_is_rational) + // aAbs is INF. + eors r1, r0 // aRep ^ bRep. + movs r6, #1 + lsls r6, r6, #(typeWidth - 1) // get sign mask. + cmp r1, r6 // if they only differ on sign bit, it's -INF + INF + beq LOCAL_LABEL(a_is_nan) + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(a_is_rational): + cmp r3, r4 + bne LOCAL_LABEL(b_is_rational) + movs r0, r1 + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(b_is_rational): + // either a or b or both are zero. + adds r4, r2, r3 + beq LOCAL_LABEL(both_zero) + cmp r2, #0 // is absA 0 ? + beq LOCAL_LABEL(ret_b) + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(both_zero): + ands r0, r1 // +0 + -0 = +0 + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(ret_b): + movs r0, r1 + +LOCAL_LABEL(ret): + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(b_is_nan): + movs r0, r1 +LOCAL_LABEL(a_is_nan): + movs r1, #1 + lsls r1, r1, #(significandBits -1) // r1 is quiet bit. + orrs r0, r1 + pop {r4, r5, r6, r7, pc} + +LOCAL_LABEL(ret_inf): + movs r4, #0xFF + lsls r4, r4, #(significandBits) + orrs r0, r4 + lsrs r0, r0, #(significandBits) + lsls r0, r0, #(significandBits) + pop {r4, r5, r6, r7, pc} + + +END_COMPILERRT_FUNCTION(__addsf3) + +NO_EXEC_STACK_DIRECTIVE diff --git a/wasmrt/llvm-builtins/builtins/arm/addsf3vfp.S b/wasmrt/llvm-builtins/builtins/arm/addsf3vfp.S new file mode 100644 index 0000000..c9d1fd1 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/addsf3vfp.S @@ -0,0 +1,32 @@ +//===-- addsf3vfp.S - Implement addsf3vfp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __addsf3vfp(float a, float b); +// +// Adds two single precision floating point numbers using the Darwin +// calling convention where single arguments are passsed in GPRs +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__addsf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vadd.f32 s0, s0, s1 +#else + vmov s14, r0 // move first param from r0 into float register + vmov s15, r1 // move second param from r1 into float register + vadd.f32 s14, s14, s15 + vmov r0, s14 // move result back to r0 +#endif + bx lr +END_COMPILERRT_FUNCTION(__addsf3vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/aeabi_cdcmp.S b/wasmrt/llvm-builtins/builtins/arm/aeabi_cdcmp.S new file mode 100644 index 0000000..c7abdb0 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/aeabi_cdcmp.S @@ -0,0 +1,140 @@ +//===-- aeabi_cdcmp.S - EABI cdcmp* implementation ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +#define APSR_Z (1 << 30) +#define APSR_C (1 << 29) + +// void __aeabi_cdcmpeq(double a, double b) { +// if (isnan(a) || isnan(b)) { +// Z = 0; C = 1; +// } else { +// __aeabi_cdcmple(a, b); +// } +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmpeq) + push {r0-r3, lr} + bl __aeabi_cdcmpeq_check_nan + cmp r0, #1 +#if defined(USE_THUMB_1) + beq 1f + // NaN has been ruled out, so __aeabi_cdcmple can't trap + mov r0, sp + ldm r0, {r0-r3} + bl __aeabi_cdcmple + pop {r0-r3, pc} +1: + // Z = 0, C = 1 + movs r0, #0xF + lsls r0, r0, #31 + pop {r0-r3, pc} +#else + pop {r0-r3, lr} + + // NaN has been ruled out, so __aeabi_cdcmple can't trap + // Use "it ne" + unconditional branch to guarantee a supported relocation if + // __aeabi_cdcmple is in a different section for some builds. + IT(ne) + bne __aeabi_cdcmple + +#if defined(USE_THUMB_2) + mov ip, #APSR_C + msr APSR_nzcvq, ip +#else + msr APSR_nzcvq, #APSR_C +#endif + JMP(lr) +#endif +END_COMPILERRT_FUNCTION(__aeabi_cdcmpeq) + + +// void __aeabi_cdcmple(double a, double b) { +// if (__aeabi_dcmplt(a, b)) { +// Z = 0; C = 0; +// } else if (__aeabi_dcmpeq(a, b)) { +// Z = 1; C = 1; +// } else { +// Z = 0; C = 1; +// } +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmple) + // Per the RTABI, this function must preserve r0-r11. + // Save lr in the same instruction for compactness + push {r0-r3, lr} + + bl __aeabi_dcmplt + cmp r0, #1 +#if defined(USE_THUMB_1) + bne 1f + // Z = 0, C = 0 + movs r0, #1 + lsls r0, r0, #1 + pop {r0-r3, pc} +1: + mov r0, sp + ldm r0, {r0-r3} + bl __aeabi_dcmpeq + cmp r0, #1 + bne 2f + // Z = 1, C = 1 + movs r0, #2 + lsls r0, r0, #31 + pop {r0-r3, pc} +2: + // Z = 0, C = 1 + movs r0, #0xF + lsls r0, r0, #31 + pop {r0-r3, pc} +#else + ITT(eq) + moveq ip, #0 + beq 1f + + ldm sp, {r0-r3} + bl __aeabi_dcmpeq + cmp r0, #1 + ITE(eq) + moveq ip, #(APSR_C | APSR_Z) + movne ip, #(APSR_C) + +1: + msr APSR_nzcvq, ip + pop {r0-r3} + POP_PC() +#endif +END_COMPILERRT_FUNCTION(__aeabi_cdcmple) + +// int __aeabi_cdrcmple(double a, double b) { +// return __aeabi_cdcmple(b, a); +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_cdrcmple) + // Swap r0 and r2 + mov ip, r0 + mov r0, r2 + mov r2, ip + + // Swap r1 and r3 + mov ip, r1 + mov r1, r3 + mov r3, ip + + b __aeabi_cdcmple +END_COMPILERRT_FUNCTION(__aeabi_cdrcmple) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/aeabi_cdcmpeq_check_nan.c b/wasmrt/llvm-builtins/builtins/arm/aeabi_cdcmpeq_check_nan.c new file mode 100644 index 0000000..7bae874 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/aeabi_cdcmpeq_check_nan.c @@ -0,0 +1,15 @@ +//===-- lib/arm/aeabi_cdcmpeq_helper.c - Helper for cdcmpeq ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../int_lib.h" +#include + +AEABI_RTABI __attribute__((visibility("hidden"))) int +__aeabi_cdcmpeq_check_nan(double a, double b) { + return __builtin_isnan(a) || __builtin_isnan(b); +} diff --git a/wasmrt/llvm-builtins/builtins/arm/aeabi_cfcmp.S b/wasmrt/llvm-builtins/builtins/arm/aeabi_cfcmp.S new file mode 100644 index 0000000..81c4766 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/aeabi_cfcmp.S @@ -0,0 +1,135 @@ +//===-- aeabi_cfcmp.S - EABI cfcmp* implementation ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +#define APSR_Z (1 << 30) +#define APSR_C (1 << 29) + +// void __aeabi_cfcmpeq(float a, float b) { +// if (isnan(a) || isnan(b)) { +// Z = 0; C = 1; +// } else { +// __aeabi_cfcmple(a, b); +// } +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmpeq) + push {r0-r3, lr} + bl __aeabi_cfcmpeq_check_nan + cmp r0, #1 +#if defined(USE_THUMB_1) + beq 1f + // NaN has been ruled out, so __aeabi_cfcmple can't trap + mov r0, sp + ldm r0, {r0-r3} + bl __aeabi_cfcmple + pop {r0-r3, pc} +1: + // Z = 0, C = 1 + movs r0, #0xF + lsls r0, r0, #31 + pop {r0-r3, pc} +#else + pop {r0-r3, lr} + + // NaN has been ruled out, so __aeabi_cfcmple can't trap + // Use "it ne" + unconditional branch to guarantee a supported relocation if + // __aeabi_cfcmple is in a different section for some builds. + IT(ne) + bne __aeabi_cfcmple + +#if defined(USE_THUMB_2) + mov ip, #APSR_C + msr APSR_nzcvq, ip +#else + msr APSR_nzcvq, #APSR_C +#endif + JMP(lr) +#endif +END_COMPILERRT_FUNCTION(__aeabi_cfcmpeq) + + +// void __aeabi_cfcmple(float a, float b) { +// if (__aeabi_fcmplt(a, b)) { +// Z = 0; C = 0; +// } else if (__aeabi_fcmpeq(a, b)) { +// Z = 1; C = 1; +// } else { +// Z = 0; C = 1; +// } +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmple) + // Per the RTABI, this function must preserve r0-r11. + // Save lr in the same instruction for compactness + push {r0-r3, lr} + + bl __aeabi_fcmplt + cmp r0, #1 +#if defined(USE_THUMB_1) + bne 1f + // Z = 0, C = 0 + movs r0, #1 + lsls r0, r0, #1 + pop {r0-r3, pc} +1: + mov r0, sp + ldm r0, {r0-r3} + bl __aeabi_fcmpeq + cmp r0, #1 + bne 2f + // Z = 1, C = 1 + movs r0, #2 + lsls r0, r0, #31 + pop {r0-r3, pc} +2: + // Z = 0, C = 1 + movs r0, #0xF + lsls r0, r0, #31 + pop {r0-r3, pc} +#else + ITT(eq) + moveq ip, #0 + beq 1f + + ldm sp, {r0-r3} + bl __aeabi_fcmpeq + cmp r0, #1 + ITE(eq) + moveq ip, #(APSR_C | APSR_Z) + movne ip, #(APSR_C) + +1: + msr APSR_nzcvq, ip + pop {r0-r3} + POP_PC() +#endif +END_COMPILERRT_FUNCTION(__aeabi_cfcmple) + +// int __aeabi_cfrcmple(float a, float b) { +// return __aeabi_cfcmple(b, a); +// } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_cfrcmple) + // Swap r0 and r1 + mov ip, r0 + mov r0, r1 + mov r1, ip + + b __aeabi_cfcmple +END_COMPILERRT_FUNCTION(__aeabi_cfrcmple) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/aeabi_cfcmpeq_check_nan.c b/wasmrt/llvm-builtins/builtins/arm/aeabi_cfcmpeq_check_nan.c new file mode 100644 index 0000000..2540733 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/aeabi_cfcmpeq_check_nan.c @@ -0,0 +1,15 @@ +//===-- lib/arm/aeabi_cfcmpeq_helper.c - Helper for cdcmpeq ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../int_lib.h" +#include + +AEABI_RTABI __attribute__((visibility("hidden"))) int +__aeabi_cfcmpeq_check_nan(float a, float b) { + return __builtin_isnan(a) || __builtin_isnan(b); +} diff --git a/wasmrt/llvm-builtins/builtins/arm/aeabi_dcmp.S b/wasmrt/llvm-builtins/builtins/arm/aeabi_dcmp.S new file mode 100644 index 0000000..5f72067 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/aeabi_dcmp.S @@ -0,0 +1,51 @@ +//===-- aeabi_dcmp.S - EABI dcmp* implementation ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// int __aeabi_dcmp{eq,lt,le,ge,gt}(double a, double b) { +// int result = __{eq,lt,le,ge,gt}df2(a, b); +// if (result {==,<,<=,>=,>} 0) { +// return 1; +// } else { +// return 0; +// } +// } + +#if defined(COMPILER_RT_ARMHF_TARGET) +# define CONVERT_DCMP_ARGS_TO_DF2_ARGS \ + vmov d0, r0, r1 SEPARATOR \ + vmov d1, r2, r3 +#else +# define CONVERT_DCMP_ARGS_TO_DF2_ARGS +#endif + +#define DEFINE_AEABI_DCMP(cond) \ + .syntax unified SEPARATOR \ + .p2align 2 SEPARATOR \ +DEFINE_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) \ + push { r4, lr } SEPARATOR \ + CONVERT_DCMP_ARGS_TO_DF2_ARGS SEPARATOR \ + bl SYMBOL_NAME(__ ## cond ## df2) SEPARATOR \ + cmp r0, #0 SEPARATOR \ + b ## cond 1f SEPARATOR \ + movs r0, #0 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +1: SEPARATOR \ + movs r0, #1 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +END_COMPILERRT_FUNCTION(__aeabi_dcmp ## cond) + +DEFINE_AEABI_DCMP(eq) +DEFINE_AEABI_DCMP(lt) +DEFINE_AEABI_DCMP(le) +DEFINE_AEABI_DCMP(ge) +DEFINE_AEABI_DCMP(gt) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/aeabi_div0.c b/wasmrt/llvm-builtins/builtins/arm/aeabi_div0.c new file mode 100644 index 0000000..7e88623 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/aeabi_div0.c @@ -0,0 +1,40 @@ +//===-- aeabi_div0.c - ARM Runtime ABI support routines for compiler-rt ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the division by zero helper routines as specified by the +// Run-time ABI for the ARM Architecture. +// +//===----------------------------------------------------------------------===// + +// RTABI 4.3.2 - Division by zero +// +// The *div0 functions: +// - Return the value passed to them as a parameter +// - Or, return a fixed value defined by the execution environment (such as 0) +// - Or, raise a signal (often SIGFPE) or throw an exception, and do not return +// +// An application may provide its own implementations of the *div0 functions to +// for a particular behaviour from the *div and *divmod functions called out of +// line. + +#include "../int_lib.h" + +// provide an unused declaration to pacify pendantic compilation +extern unsigned char declaration; + +#if defined(__ARM_EABI__) +AEABI_RTABI int __attribute__((weak)) __attribute__((visibility("hidden"))) +__aeabi_idiv0(int return_value) { + return return_value; +} + +AEABI_RTABI long long __attribute__((weak)) +__attribute__((visibility("hidden"))) __aeabi_ldiv0(long long return_value) { + return return_value; +} +#endif diff --git a/wasmrt/llvm-builtins/builtins/arm/aeabi_drsub.c b/wasmrt/llvm-builtins/builtins/arm/aeabi_drsub.c new file mode 100644 index 0000000..e4e8dc0 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/aeabi_drsub.c @@ -0,0 +1,14 @@ +//===-- lib/arm/aeabi_drsub.c - Double-precision subtraction --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "../fp_lib.h" + +AEABI_RTABI fp_t __aeabi_dsub(fp_t, fp_t); + +AEABI_RTABI fp_t __aeabi_drsub(fp_t a, fp_t b) { return __aeabi_dsub(b, a); } diff --git a/wasmrt/llvm-builtins/builtins/arm/aeabi_fcmp.S b/wasmrt/llvm-builtins/builtins/arm/aeabi_fcmp.S new file mode 100644 index 0000000..cd311b4 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/aeabi_fcmp.S @@ -0,0 +1,51 @@ +//===-- aeabi_fcmp.S - EABI fcmp* implementation ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// int __aeabi_fcmp{eq,lt,le,ge,gt}(float a, float b) { +// int result = __{eq,lt,le,ge,gt}sf2(a, b); +// if (result {==,<,<=,>=,>} 0) { +// return 1; +// } else { +// return 0; +// } +// } + +#if defined(COMPILER_RT_ARMHF_TARGET) +# define CONVERT_FCMP_ARGS_TO_SF2_ARGS \ + vmov s0, r0 SEPARATOR \ + vmov s1, r1 +#else +# define CONVERT_FCMP_ARGS_TO_SF2_ARGS +#endif + +#define DEFINE_AEABI_FCMP(cond) \ + .syntax unified SEPARATOR \ + .p2align 2 SEPARATOR \ +DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) \ + push { r4, lr } SEPARATOR \ + CONVERT_FCMP_ARGS_TO_SF2_ARGS SEPARATOR \ + bl SYMBOL_NAME(__ ## cond ## sf2) SEPARATOR \ + cmp r0, #0 SEPARATOR \ + b ## cond 1f SEPARATOR \ + movs r0, #0 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +1: SEPARATOR \ + movs r0, #1 SEPARATOR \ + pop { r4, pc } SEPARATOR \ +END_COMPILERRT_FUNCTION(__aeabi_fcmp ## cond) + +DEFINE_AEABI_FCMP(eq) +DEFINE_AEABI_FCMP(lt) +DEFINE_AEABI_FCMP(le) +DEFINE_AEABI_FCMP(ge) +DEFINE_AEABI_FCMP(gt) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/aeabi_frsub.c b/wasmrt/llvm-builtins/builtins/arm/aeabi_frsub.c new file mode 100644 index 0000000..9a36324 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/aeabi_frsub.c @@ -0,0 +1,14 @@ +//===-- lib/arm/aeabi_frsub.c - Single-precision subtraction --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "../fp_lib.h" + +AEABI_RTABI fp_t __aeabi_fsub(fp_t, fp_t); + +AEABI_RTABI fp_t __aeabi_frsub(fp_t a, fp_t b) { return __aeabi_fsub(b, a); } diff --git a/wasmrt/llvm-builtins/builtins/arm/aeabi_idivmod.S b/wasmrt/llvm-builtins/builtins/arm/aeabi_idivmod.S new file mode 100644 index 0000000..bb80e4b --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/aeabi_idivmod.S @@ -0,0 +1,50 @@ +//===-- aeabi_idivmod.S - EABI idivmod implementation ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// struct { int quot, int rem} __aeabi_idivmod(int numerator, int denominator) { +// int rem, quot; +// quot = __divmodsi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + +#if defined(__MINGW32__) +#define __aeabi_idivmod __rt_sdiv +#endif + + .syntax unified + .text + DEFINE_CODE_STATE + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_idivmod) +#if defined(USE_THUMB_1) + push {r0, r1, lr} + bl SYMBOL_NAME(__divsi3) + pop {r1, r2, r3} // now r0 = quot, r1 = num, r2 = denom + muls r2, r0, r2 // r2 = quot * denom + subs r1, r1, r2 + JMP (r3) +#else // defined(USE_THUMB_1) + push { lr } + sub sp, sp, #4 + mov r2, sp +#if defined(__MINGW32__) + mov r3, r0 + mov r0, r1 + mov r1, r3 +#endif + bl SYMBOL_NAME(__divmodsi4) + ldr r1, [sp] + add sp, sp, #4 + pop { pc } +#endif // defined(USE_THUMB_1) +END_COMPILERRT_FUNCTION(__aeabi_idivmod) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/aeabi_ldivmod.S b/wasmrt/llvm-builtins/builtins/arm/aeabi_ldivmod.S new file mode 100644 index 0000000..d0d06be --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/aeabi_ldivmod.S @@ -0,0 +1,45 @@ +//===-- aeabi_ldivmod.S - EABI ldivmod implementation ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// struct { int64_t quot, int64_t rem} +// __aeabi_ldivmod(int64_t numerator, int64_t denominator) { +// int64_t rem, quot; +// quot = __divmoddi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + +#if defined(__MINGW32__) +#define __aeabi_ldivmod __rt_sdiv64 +#endif + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_ldivmod) + push {r6, lr} + sub sp, sp, #16 + add r6, sp, #8 + str r6, [sp] +#if defined(__MINGW32__) + movs r6, r0 + movs r0, r2 + movs r2, r6 + movs r6, r1 + movs r1, r3 + movs r3, r6 +#endif + bl SYMBOL_NAME(__divmoddi4) + ldr r2, [sp, #8] + ldr r3, [sp, #12] + add sp, sp, #16 + pop {r6, pc} +END_COMPILERRT_FUNCTION(__aeabi_ldivmod) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/aeabi_memcmp.S b/wasmrt/llvm-builtins/builtins/arm/aeabi_memcmp.S new file mode 100644 index 0000000..4163728 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/aeabi_memcmp.S @@ -0,0 +1,29 @@ +//===-- aeabi_memcmp.S - EABI memcmp implementation -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// void __aeabi_memcmp(void *dest, void *src, size_t n) { memcmp(dest, src, n); } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memcmp) +#ifdef USE_THUMB_1 + push {r7, lr} + bl memcmp + pop {r7, pc} +#else + b memcmp +#endif +END_COMPILERRT_FUNCTION(__aeabi_memcmp) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp4, __aeabi_memcmp) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcmp8, __aeabi_memcmp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/aeabi_memcpy.S b/wasmrt/llvm-builtins/builtins/arm/aeabi_memcpy.S new file mode 100644 index 0000000..93e1b05 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/aeabi_memcpy.S @@ -0,0 +1,29 @@ +//===-- aeabi_memcpy.S - EABI memcpy implementation -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// void __aeabi_memcpy(void *dest, void *src, size_t n) { memcpy(dest, src, n); } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memcpy) +#ifdef USE_THUMB_1 + push {r7, lr} + bl memcpy + pop {r7, pc} +#else + b memcpy +#endif +END_COMPILERRT_FUNCTION(__aeabi_memcpy) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy4, __aeabi_memcpy) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memcpy8, __aeabi_memcpy) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/aeabi_memmove.S b/wasmrt/llvm-builtins/builtins/arm/aeabi_memmove.S new file mode 100644 index 0000000..c2f0fa4 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/aeabi_memmove.S @@ -0,0 +1,28 @@ +//===-- aeabi_memmove.S - EABI memmove implementation --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#include "../assembly.h" + +// void __aeabi_memmove(void *dest, void *src, size_t n) { memmove(dest, src, n); } + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memmove) +#ifdef USE_THUMB_1 + push {r7, lr} + bl memmove + pop {r7, pc} +#else + b memmove +#endif +END_COMPILERRT_FUNCTION(__aeabi_memmove) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove4, __aeabi_memmove) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memmove8, __aeabi_memmove) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/aeabi_memset.S b/wasmrt/llvm-builtins/builtins/arm/aeabi_memset.S new file mode 100644 index 0000000..2aa8ec0 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/aeabi_memset.S @@ -0,0 +1,49 @@ +//===-- aeabi_memset.S - EABI memset implementation -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// void __aeabi_memset(void *dest, size_t n, int c) { memset(dest, c, n); } +// void __aeabi_memclr(void *dest, size_t n) { __aeabi_memset(dest, n, 0); } + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memset) + mov r3, r1 + mov r1, r2 + mov r2, r3 +#ifdef USE_THUMB_1 + push {r7, lr} + bl memset + pop {r7, pc} +#else + b memset +#endif +END_COMPILERRT_FUNCTION(__aeabi_memset) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset4, __aeabi_memset) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memset8, __aeabi_memset) + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_memclr) + mov r2, r1 + movs r1, #0 +#ifdef USE_THUMB_1 + push {r7, lr} + bl memset + pop {r7, pc} +#else + b memset +#endif +END_COMPILERRT_FUNCTION(__aeabi_memclr) + +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr4, __aeabi_memclr) +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_memclr8, __aeabi_memclr) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/aeabi_uidivmod.S b/wasmrt/llvm-builtins/builtins/arm/aeabi_uidivmod.S new file mode 100644 index 0000000..df03076 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/aeabi_uidivmod.S @@ -0,0 +1,57 @@ +//===-- aeabi_uidivmod.S - EABI uidivmod implementation -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// struct { unsigned quot, unsigned rem} +// __aeabi_uidivmod(unsigned numerator, unsigned denominator) { +// unsigned rem, quot; +// quot = __udivmodsi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + +#if defined(__MINGW32__) +#define __aeabi_uidivmod __rt_udiv +#endif + + .syntax unified + .text + DEFINE_CODE_STATE + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod) +#if defined(USE_THUMB_1) + cmp r0, r1 + bcc LOCAL_LABEL(case_denom_larger) + push {r0, r1, lr} + bl SYMBOL_NAME(__aeabi_uidiv) + pop {r1, r2, r3} + muls r2, r0, r2 // r2 = quot * denom + subs r1, r1, r2 + JMP (r3) +LOCAL_LABEL(case_denom_larger): + movs r1, r0 + movs r0, #0 + JMP (lr) +#else // defined(USE_THUMB_1) + push { lr } + sub sp, sp, #4 + mov r2, sp +#if defined(__MINGW32__) + mov r3, r0 + mov r0, r1 + mov r1, r3 +#endif + bl SYMBOL_NAME(__udivmodsi4) + ldr r1, [sp] + add sp, sp, #4 + pop { pc } +#endif +END_COMPILERRT_FUNCTION(__aeabi_uidivmod) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/aeabi_uldivmod.S b/wasmrt/llvm-builtins/builtins/arm/aeabi_uldivmod.S new file mode 100644 index 0000000..4fc9770 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/aeabi_uldivmod.S @@ -0,0 +1,45 @@ +//===-- aeabi_uldivmod.S - EABI uldivmod implementation -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// struct { uint64_t quot, uint64_t rem} +// __aeabi_uldivmod(uint64_t numerator, uint64_t denominator) { +// uint64_t rem, quot; +// quot = __udivmoddi4(numerator, denominator, &rem); +// return {quot, rem}; +// } + +#if defined(__MINGW32__) +#define __aeabi_uldivmod __rt_udiv64 +#endif + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__aeabi_uldivmod) + push {r6, lr} + sub sp, sp, #16 + add r6, sp, #8 + str r6, [sp] +#if defined(__MINGW32__) + movs r6, r0 + movs r0, r2 + movs r2, r6 + movs r6, r1 + movs r1, r3 + movs r3, r6 +#endif + bl SYMBOL_NAME(__udivmoddi4) + ldr r2, [sp, #8] + ldr r3, [sp, #12] + add sp, sp, #16 + pop {r6, pc} +END_COMPILERRT_FUNCTION(__aeabi_uldivmod) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/bswapdi2.S b/wasmrt/llvm-builtins/builtins/arm/bswapdi2.S new file mode 100644 index 0000000..271df8b --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/bswapdi2.S @@ -0,0 +1,43 @@ +//===------- bswapdi2 - Implement bswapdi2 --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + + .syntax unified + .text + DEFINE_CODE_STATE + +// +// extern uint64_t __bswapdi2(uint64_t); +// +// Reverse all the bytes in a 64-bit integer. +// + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__bswapdi2) +#if __ARM_ARCH < 6 + // before armv6 does not have "rev" instruction + // r2 = rev(r0) + eor r2, r0, r0, ror #16 + bic r2, r2, #0xff0000 + mov r2, r2, lsr #8 + eor r2, r2, r0, ror #8 + // r0 = rev(r1) + eor r0, r1, r1, ror #16 + bic r0, r0, #0xff0000 + mov r0, r0, lsr #8 + eor r0, r0, r1, ror #8 +#else + rev r2, r0 // r2 = rev(r0) + rev r0, r1 // r0 = rev(r1) +#endif + mov r1, r2 // r1 = r2 = rev(r0) + JMP(lr) +END_COMPILERRT_FUNCTION(__bswapdi2) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/bswapsi2.S b/wasmrt/llvm-builtins/builtins/arm/bswapsi2.S new file mode 100644 index 0000000..07cc3d8 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/bswapsi2.S @@ -0,0 +1,35 @@ +//===------- bswapsi2 - Implement bswapsi2 --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + + .syntax unified + .text + DEFINE_CODE_STATE + +// +// extern uint32_t __bswapsi2(uint32_t); +// +// Reverse all the bytes in a 32-bit integer. +// + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__bswapsi2) +#if __ARM_ARCH < 6 + // before armv6 does not have "rev" instruction + eor r1, r0, r0, ror #16 + bic r1, r1, #0xff0000 + mov r1, r1, lsr #8 + eor r0, r1, r0, ror #8 +#else + rev r0, r0 +#endif + JMP(lr) +END_COMPILERRT_FUNCTION(__bswapsi2) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/chkstk.S b/wasmrt/llvm-builtins/builtins/arm/chkstk.S new file mode 100644 index 0000000..c5c9ebe --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/chkstk.S @@ -0,0 +1,35 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "../assembly.h" + +// __chkstk routine +// This routine is windows specific. +// http://msdn.microsoft.com/en-us/library/ms648426.aspx + +// This clobbers the register r12, and the condition codes, and uses r5 and r6 +// as temporaries by backing them up and restoring them afterwards. +// Does not modify any memory or the stack pointer. + +// movw r4, #256 // Number of bytes of stack, in units of 4 byte +// bl __chkstk +// sub.w sp, sp, r4 + +#define PAGE_SIZE 4096 + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__chkstk) + lsl r4, r4, #2 + mov r12, sp + push {r5, r6} + mov r5, r4 +1: + sub r12, r12, #PAGE_SIZE + subs r5, r5, #PAGE_SIZE + ldr r6, [r12] + bgt 1b + + pop {r5, r6} + bx lr +END_COMPILERRT_FUNCTION(__chkstk) diff --git a/wasmrt/llvm-builtins/builtins/arm/clzdi2.S b/wasmrt/llvm-builtins/builtins/arm/clzdi2.S new file mode 100644 index 0000000..685668b --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/clzdi2.S @@ -0,0 +1,86 @@ +//===-- clzdi2.c - Implement __clzdi2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements count leading zeros for 64bit arguments. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + + .syntax unified + .text + DEFINE_CODE_STATE + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__clzdi2) +#ifdef __ARM_FEATURE_CLZ +#ifdef __ARMEB__ + cmp r0, 0 + itee ne + clzne r0, r0 + clzeq r0, r1 + addeq r0, r0, 32 +#else + cmp r1, 0 + itee ne + clzne r0, r1 + clzeq r0, r0 + addeq r0, r0, 32 +#endif + JMP(lr) +#else + // Assumption: n != 0 + + // r0: n + // r1: upper half of n, overwritten after check + // r1: count of leading zeros in n + 1 + // r2: scratch register for shifted r0 +#ifdef __ARMEB__ + cmp r0, 0 + moveq r0, r1 +#else + cmp r1, 0 + movne r0, r1 +#endif + movne r1, 1 + moveq r1, 33 + + // Basic block: + // if ((r0 >> SHIFT) == 0) + // r1 += SHIFT; + // else + // r0 >>= SHIFT; + // for descending powers of two as SHIFT. +#define BLOCK(shift) \ + lsrs r2, r0, shift; \ + movne r0, r2; \ + addeq r1, shift \ + + BLOCK(16) + BLOCK(8) + BLOCK(4) + BLOCK(2) + + // The basic block invariants at this point are (r0 >> 2) == 0 and + // r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1. + // + // r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1) + // ---+----------------+----------------+------------+-------------- + // 1 | 1 | 0 | 0 | 1 + // 2 | 0 | 1 | -1 | 0 + // 3 | 0 | 1 | -1 | 0 + // + // The r1's initial value of 1 compensates for the 1 here. + sub r0, r1, r0, lsr #1 + + JMP(lr) +#endif // __ARM_FEATURE_CLZ +END_COMPILERRT_FUNCTION(__clzdi2) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/clzsi2.S b/wasmrt/llvm-builtins/builtins/arm/clzsi2.S new file mode 100644 index 0000000..5d86fe4 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/clzsi2.S @@ -0,0 +1,66 @@ +//===-- clzsi2.c - Implement __clzsi2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements count leading zeros for 32bit arguments. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + + .syntax unified + .text + DEFINE_CODE_STATE + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__clzsi2) +#ifdef __ARM_FEATURE_CLZ + clz r0, r0 + JMP(lr) +#else + // Assumption: n != 0 + + // r0: n + // r1: count of leading zeros in n + 1 + // r2: scratch register for shifted r0 + mov r1, 1 + + // Basic block: + // if ((r0 >> SHIFT) == 0) + // r1 += SHIFT; + // else + // r0 >>= SHIFT; + // for descending powers of two as SHIFT. + +#define BLOCK(shift) \ + lsrs r2, r0, shift; \ + movne r0, r2; \ + addeq r1, shift \ + + BLOCK(16) + BLOCK(8) + BLOCK(4) + BLOCK(2) + + // The basic block invariants at this point are (r0 >> 2) == 0 and + // r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1. + // + // r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1) + // ---+----------------+----------------+------------+-------------- + // 1 | 1 | 0 | 0 | 1 + // 2 | 0 | 1 | -1 | 0 + // 3 | 0 | 1 | -1 | 0 + // + // The r1's initial value of 1 compensates for the 1 here. + sub r0, r1, r0, lsr #1 + + JMP(lr) +#endif // __ARM_FEATURE_CLZ +END_COMPILERRT_FUNCTION(__clzsi2) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/comparesf2.S b/wasmrt/llvm-builtins/builtins/arm/comparesf2.S new file mode 100644 index 0000000..24b85d2 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/comparesf2.S @@ -0,0 +1,261 @@ +//===-- comparesf2.S - Implement single-precision soft-float comparisons --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the following soft-fp_t comparison routines: +// +// __eqsf2 __gesf2 __unordsf2 +// __lesf2 __gtsf2 +// __ltsf2 +// __nesf2 +// +// The semantics of the routines grouped in each column are identical, so there +// is a single implementation for each, with multiple names. +// +// The routines behave as follows: +// +// __lesf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// 1 if either a or b is NaN +// +// __gesf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// -1 if either a or b is NaN +// +// __unordsf2(a,b) returns 0 if both a and b are numbers +// 1 if either a or b is NaN +// +// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of +// NaN values. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + + .syntax unified + .text + DEFINE_CODE_STATE + + .macro COMPARESF2_FUNCTION_BODY handle_nan:req +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov r0, s0 + vmov r1, s1 +#endif + // Make copies of a and b with the sign bit shifted off the top. These will + // be used to detect zeros and NaNs. +#if defined(USE_THUMB_1) + push {r6, lr} + lsls r2, r0, #1 + lsls r3, r1, #1 +#else + mov r2, r0, lsl #1 + mov r3, r1, lsl #1 +#endif + + // We do the comparison in three stages (ignoring NaN values for the time + // being). First, we orr the absolute values of a and b; this sets the Z + // flag if both a and b are zero (of either sign). The shift of r3 doesn't + // effect this at all, but it *does* make sure that the C flag is clear for + // the subsequent operations. +#if defined(USE_THUMB_1) + lsrs r6, r3, #1 + orrs r6, r2 +#else + orrs r12, r2, r3, lsr #1 +#endif + // Next, we check if a and b have the same or different signs. If they have + // opposite signs, this eor will set the N flag. +#if defined(USE_THUMB_1) + beq 1f + movs r6, r0 + eors r6, r1 +1: +#else + it ne + eorsne r12, r0, r1 +#endif + + // If a and b are equal (either both zeros or bit identical; again, we're + // ignoring NaNs for now), this subtract will zero out r0. If they have the + // same sign, the flags are updated as they would be for a comparison of the + // absolute values of a and b. +#if defined(USE_THUMB_1) + bmi 1f + subs r0, r2, r3 +1: +#else + it pl + subspl r0, r2, r3 +#endif + + // If a is smaller in magnitude than b and both have the same sign, place + // the negation of the sign of b in r0. Thus, if both are negative and + // a > b, this sets r0 to 0; if both are positive and a < b, this sets + // r0 to -1. + // + // This is also done if a and b have opposite signs and are not both zero, + // because in that case the subtract was not performed and the C flag is + // still clear from the shift argument in orrs; if a is positive and b + // negative, this places 0 in r0; if a is negative and b positive, -1 is + // placed in r0. +#if defined(USE_THUMB_1) + bhs 1f + // Here if a and b have the same sign and absA < absB, the result is thus + // b < 0 ? 1 : -1. Same if a and b have the opposite sign (ignoring Nan). + movs r0, #1 + lsrs r1, #31 + bne LOCAL_LABEL(CHECK_NAN\@) + negs r0, r0 + b LOCAL_LABEL(CHECK_NAN\@) +1: +#else + it lo + mvnlo r0, r1, asr #31 +#endif + + // If a is greater in magnitude than b and both have the same sign, place + // the sign of b in r0. Thus, if both are negative and a < b, -1 is placed + // in r0, which is the desired result. Conversely, if both are positive + // and a > b, zero is placed in r0. +#if defined(USE_THUMB_1) + bls 1f + // Here both have the same sign and absA > absB. + movs r0, #1 + lsrs r1, #31 + beq LOCAL_LABEL(CHECK_NAN\@) + negs r0, r0 +1: +#else + it hi + movhi r0, r1, asr #31 +#endif + + // If you've been keeping track, at this point r0 contains -1 if a < b and + // 0 if a >= b. All that remains to be done is to set it to 1 if a > b. + // If a == b, then the Z flag is set, so we can get the correct final value + // into r0 by simply or'ing with 1 if Z is clear. + // For Thumb-1, r0 contains -1 if a < b, 0 if a > b and 0 if a == b. +#if !defined(USE_THUMB_1) + it ne + orrne r0, r0, #1 +#endif + + // Finally, we need to deal with NaNs. If either argument is NaN, replace + // the value in r0 with 1. +#if defined(USE_THUMB_1) +LOCAL_LABEL(CHECK_NAN\@): + movs r6, #0xff + lsls r6, #24 + cmp r2, r6 + bhi 1f + cmp r3, r6 +1: + bls 2f + \handle_nan +2: + pop {r6, pc} +#else + cmp r2, #0xff000000 + ite ls + cmpls r3, #0xff000000 + \handle_nan + JMP(lr) +#endif + .endm + +@ int __eqsf2(float a, float b) + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__eqsf2) + + .macro __eqsf2_handle_nan +#if defined(USE_THUMB_1) + movs r0, #1 +#else + movhi r0, #1 +#endif + .endm + +COMPARESF2_FUNCTION_BODY __eqsf2_handle_nan + +END_COMPILERRT_FUNCTION(__eqsf2) + +DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __eqsf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __eqsf2) +DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __eqsf2) + +#if defined(__ELF__) +// Alias for libgcc compatibility +DEFINE_COMPILERRT_FUNCTION_ALIAS(__cmpsf2, __lesf2) +#endif + +@ int __gtsf2(float a, float b) + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gtsf2) + + .macro __gtsf2_handle_nan +#if defined(USE_THUMB_1) + movs r0, #1 + negs r0, r0 +#else + movhi r0, #-1 +#endif + .endm + +COMPARESF2_FUNCTION_BODY __gtsf2_handle_nan + +END_COMPILERRT_FUNCTION(__gtsf2) + +DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __gtsf2) + +@ int __unordsf2(float a, float b) + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__unordsf2) + +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov r0, s0 + vmov r1, s1 +#endif + // Return 1 for NaN values, 0 otherwise. + lsls r2, r0, #1 + lsls r3, r1, #1 + movs r0, #0 +#if defined(USE_THUMB_1) + movs r1, #0xff + lsls r1, #24 + cmp r2, r1 + bhi 1f + cmp r3, r1 +1: + bls 2f + movs r0, #1 +2: +#else + cmp r2, #0xff000000 + ite ls + cmpls r3, #0xff000000 + movhi r0, #1 +#endif + JMP(lr) +END_COMPILERRT_FUNCTION(__unordsf2) + +#if defined(COMPILER_RT_ARMHF_TARGET) +DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpun) + vmov s0, r0 + vmov s1, r1 + b SYMBOL_NAME(__unordsf2) +END_COMPILERRT_FUNCTION(__aeabi_fcmpun) +#else +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fcmpun, __unordsf2) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/divdf3vfp.S b/wasmrt/llvm-builtins/builtins/arm/divdf3vfp.S new file mode 100644 index 0000000..ad50b57 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/divdf3vfp.S @@ -0,0 +1,32 @@ +//===-- divdf3vfp.S - Implement divdf3vfp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __divdf3vfp(double a, double b); +// +// Divides two double precision floating point numbers using the Darwin +// calling convention where double arguments are passsed in GPR pairs +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__divdf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vdiv.f64 d0, d0, d1 +#else + vmov d6, r0, r1 // move first param from r0/r1 pair into d6 + vmov d7, r2, r3 // move second param from r2/r3 pair into d7 + vdiv.f64 d5, d6, d7 + vmov r0, r1, d5 // move result back to r0/r1 pair +#endif + bx lr +END_COMPILERRT_FUNCTION(__divdf3vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/divmodsi4.S b/wasmrt/llvm-builtins/builtins/arm/divmodsi4.S new file mode 100644 index 0000000..f94438d --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/divmodsi4.S @@ -0,0 +1,70 @@ +//===-- divmodsi4.S - 32-bit signed integer divide and modulus ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __divmodsi4 (32-bit signed integer divide and +// modulus) function for the ARM architecture. A naive digit-by-digit +// computation is employed for simplicity. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +#define ESTABLISH_FRAME \ + push {r4-r7, lr} ;\ + add r7, sp, #12 +#define CLEAR_FRAME_AND_RETURN \ + pop {r4-r7, pc} + + .syntax unified + .text + DEFINE_CODE_STATE + +@ int __divmodsi4(int divident, int divisor, int *remainder) +@ Calculate the quotient and remainder of the (signed) division. The return +@ value is the quotient, the remainder is placed in the variable. + + .p2align 3 +DEFINE_COMPILERRT_FUNCTION(__divmodsi4) +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divzero) + mov r3, r0 + sdiv r0, r3, r1 + mls r1, r0, r1, r3 + str r1, [r2] + bx lr +LOCAL_LABEL(divzero): + mov r0, #0 + bx lr +#else + ESTABLISH_FRAME +// Set aside the sign of the quotient and modulus, and the address for the +// modulus. + eor r4, r0, r1 + mov r5, r0 + mov r6, r2 +// Take the absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). + eor ip, r0, r0, asr #31 + eor lr, r1, r1, asr #31 + sub r0, ip, r0, asr #31 + sub r1, lr, r1, asr #31 +// Unsigned divmod: + bl SYMBOL_NAME(__udivmodsi4) +// Apply the sign of quotient and modulus + ldr r1, [r6] + eor r0, r0, r4, asr #31 + eor r1, r1, r5, asr #31 + sub r0, r0, r4, asr #31 + sub r1, r1, r5, asr #31 + str r1, [r6] + CLEAR_FRAME_AND_RETURN +#endif +END_COMPILERRT_FUNCTION(__divmodsi4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/divsf3vfp.S b/wasmrt/llvm-builtins/builtins/arm/divsf3vfp.S new file mode 100644 index 0000000..958a672 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/divsf3vfp.S @@ -0,0 +1,32 @@ +//===-- divsf3vfp.S - Implement divsf3vfp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __divsf3vfp(float a, float b); +// +// Divides two single precision floating point numbers using the Darwin +// calling convention where single arguments are passsed like 32-bit ints. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__divsf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vdiv.f32 s0, s0, s1 +#else + vmov s14, r0 // move first param from r0 into float register + vmov s15, r1 // move second param from r1 into float register + vdiv.f32 s13, s14, s15 + vmov r0, s13 // move result back to r0 +#endif + bx lr +END_COMPILERRT_FUNCTION(__divsf3vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/divsi3.S b/wasmrt/llvm-builtins/builtins/arm/divsi3.S new file mode 100644 index 0000000..faf9af9 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/divsi3.S @@ -0,0 +1,82 @@ +//===-- divsi3.S - 32-bit signed integer divide ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __divsi3 (32-bit signed integer divide) function +// for the ARM architecture as a wrapper around the unsigned routine. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +#define ESTABLISH_FRAME \ + push {r4, r7, lr} ;\ + add r7, sp, #4 +#define CLEAR_FRAME_AND_RETURN \ + pop {r4, r7, pc} + + .syntax unified + .text + DEFINE_CODE_STATE + + .p2align 3 +// Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine. +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_idiv, __divsi3) + +@ int __divsi3(int divident, int divisor) +@ Calculate and return the quotient of the (signed) division. + +DEFINE_COMPILERRT_FUNCTION(__divsi3) +#if __ARM_ARCH_EXT_IDIV__ + tst r1,r1 + beq LOCAL_LABEL(divzero) + sdiv r0, r0, r1 + bx lr +LOCAL_LABEL(divzero): + // Use movs for compatibility with v8-m.base. + movs r0,#0 + bx lr +#else +ESTABLISH_FRAME +// Set aside the sign of the quotient. +# if defined(USE_THUMB_1) + movs r4, r0 + eors r4, r1 +# else + eor r4, r0, r1 +# endif +// Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). +# if defined(USE_THUMB_1) + asrs r2, r0, #31 + asrs r3, r1, #31 + eors r0, r2 + eors r1, r3 + subs r0, r0, r2 + subs r1, r1, r3 +# else + eor r2, r0, r0, asr #31 + eor r3, r1, r1, asr #31 + sub r0, r2, r0, asr #31 + sub r1, r3, r1, asr #31 +# endif +// abs(a) / abs(b) + bl SYMBOL_NAME(__udivsi3) +// Apply sign of quotient to result and return. +# if defined(USE_THUMB_1) + asrs r4, #31 + eors r0, r4 + subs r0, r0, r4 +# else + eor r0, r0, r4, asr #31 + sub r0, r0, r4, asr #31 +# endif + CLEAR_FRAME_AND_RETURN +#endif +END_COMPILERRT_FUNCTION(__divsi3) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/eqdf2vfp.S b/wasmrt/llvm-builtins/builtins/arm/eqdf2vfp.S new file mode 100644 index 0000000..2a0a64b --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/eqdf2vfp.S @@ -0,0 +1,35 @@ +//===-- eqdf2vfp.S - Implement eqdf2vfp -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// extern int __eqdf2vfp(double a, double b); +// +// Returns one iff a == b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 +#endif + vmrs apsr_nzcv, fpscr + ITE(eq) + moveq r0, #1 // set result register to 1 if equal + movne r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__eqdf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/eqsf2vfp.S b/wasmrt/llvm-builtins/builtins/arm/eqsf2vfp.S new file mode 100644 index 0000000..5fefe7b --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/eqsf2vfp.S @@ -0,0 +1,36 @@ +//===-- eqsf2vfp.S - Implement eqsf2vfp -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __eqsf2vfp(float a, float b); +// +// Returns one iff a == b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 +#endif + vmrs apsr_nzcv, fpscr + ITE(eq) + moveq r0, #1 // set result register to 1 if equal + movne r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__eqsf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/extendsfdf2vfp.S b/wasmrt/llvm-builtins/builtins/arm/extendsfdf2vfp.S new file mode 100644 index 0000000..37c8be8 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/extendsfdf2vfp.S @@ -0,0 +1,32 @@ +//===-- extendsfdf2vfp.S - Implement extendsfdf2vfp -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __extendsfdf2vfp(float a); +// +// Converts single precision float to double precision result. +// Uses Darwin calling convention where a single precision parameter is +// passed in a GPR and a double precision result is returned in R0/R1 pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__extendsfdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.f64.f32 d0, s0 +#else + vmov s15, r0 // load float register from R0 + vcvt.f64.f32 d7, s15 // convert single to double + vmov r0, r1, d7 // return result in r0/r1 pair +#endif + bx lr +END_COMPILERRT_FUNCTION(__extendsfdf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/fixdfsivfp.S b/wasmrt/llvm-builtins/builtins/arm/fixdfsivfp.S new file mode 100644 index 0000000..af1d4f4 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/fixdfsivfp.S @@ -0,0 +1,33 @@ +//===-- fixdfsivfp.S - Implement fixdfsivfp -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __fixdfsivfp(double a); +// +// Converts double precision float to a 32-bit int rounding towards zero. +// Uses Darwin calling convention where a double precision parameter is +// passed in GPR register pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__fixdfsivfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.s32.f64 s0, d0 + vmov r0, s0 +#else + vmov d7, r0, r1 // load double register from R0/R1 + vcvt.s32.f64 s15, d7 // convert double to 32-bit int into s15 + vmov r0, s15 // move s15 to result register +#endif + bx lr +END_COMPILERRT_FUNCTION(__fixdfsivfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/fixsfsivfp.S b/wasmrt/llvm-builtins/builtins/arm/fixsfsivfp.S new file mode 100644 index 0000000..30b2f3c --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/fixsfsivfp.S @@ -0,0 +1,33 @@ +//===-- fixsfsivfp.S - Implement fixsfsivfp -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __fixsfsivfp(float a); +// +// Converts single precision float to a 32-bit int rounding towards zero. +// Uses Darwin calling convention where a single precision parameter is +// passed in a GPR.. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__fixsfsivfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.s32.f32 s0, s0 + vmov r0, s0 +#else + vmov s15, r0 // load float register from R0 + vcvt.s32.f32 s15, s15 // convert single to 32-bit int into s15 + vmov r0, s15 // move s15 to result register +#endif + bx lr +END_COMPILERRT_FUNCTION(__fixsfsivfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/fixunsdfsivfp.S b/wasmrt/llvm-builtins/builtins/arm/fixunsdfsivfp.S new file mode 100644 index 0000000..44e6dbd --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/fixunsdfsivfp.S @@ -0,0 +1,34 @@ +//===-- fixunsdfsivfp.S - Implement fixunsdfsivfp -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern unsigned int __fixunsdfsivfp(double a); +// +// Converts double precision float to a 32-bit unsigned int rounding towards +// zero. All negative values become zero. +// Uses Darwin calling convention where a double precision parameter is +// passed in GPR register pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__fixunsdfsivfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.u32.f64 s0, d0 + vmov r0, s0 +#else + vmov d7, r0, r1 // load double register from R0/R1 + vcvt.u32.f64 s15, d7 // convert double to 32-bit int into s15 + vmov r0, s15 // move s15 to result register +#endif + bx lr +END_COMPILERRT_FUNCTION(__fixunsdfsivfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/fixunssfsivfp.S b/wasmrt/llvm-builtins/builtins/arm/fixunssfsivfp.S new file mode 100644 index 0000000..5d6ee7c --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/fixunssfsivfp.S @@ -0,0 +1,34 @@ +//===-- fixunssfsivfp.S - Implement fixunssfsivfp -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern unsigned int __fixunssfsivfp(float a); +// +// Converts single precision float to a 32-bit unsigned int rounding towards +// zero. All negative values become zero. +// Uses Darwin calling convention where a single precision parameter is +// passed in a GPR.. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__fixunssfsivfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.u32.f32 s0, s0 + vmov r0, s0 +#else + vmov s15, r0 // load float register from R0 + vcvt.u32.f32 s15, s15 // convert single to 32-bit unsigned into s15 + vmov r0, s15 // move s15 to result register +#endif + bx lr +END_COMPILERRT_FUNCTION(__fixunssfsivfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/floatsidfvfp.S b/wasmrt/llvm-builtins/builtins/arm/floatsidfvfp.S new file mode 100644 index 0000000..ae8d246 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/floatsidfvfp.S @@ -0,0 +1,33 @@ +//===-- floatsidfvfp.S - Implement floatsidfvfp ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __floatsidfvfp(int a); +// +// Converts a 32-bit int to a double precision float. +// Uses Darwin calling convention where a double precision result is +// return in GPR register pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__floatsidfvfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov s0, r0 + vcvt.f64.s32 d0, s0 +#else + vmov s15, r0 // move int to float register s15 + vcvt.f64.s32 d7, s15 // convert 32-bit int in s15 to double in d7 + vmov r0, r1, d7 // move d7 to result register pair r0/r1 +#endif + bx lr +END_COMPILERRT_FUNCTION(__floatsidfvfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/floatsisfvfp.S b/wasmrt/llvm-builtins/builtins/arm/floatsisfvfp.S new file mode 100644 index 0000000..a36bc5e --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/floatsisfvfp.S @@ -0,0 +1,33 @@ +//===-- floatsisfvfp.S - Implement floatsisfvfp ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __floatsisfvfp(int a); +// +// Converts single precision float to a 32-bit int rounding towards zero. +// Uses Darwin calling convention where a single precision result is +// return in a GPR.. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__floatsisfvfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov s0, r0 + vcvt.f32.s32 s0, s0 +#else + vmov s15, r0 // move int to float register s15 + vcvt.f32.s32 s15, s15 // convert 32-bit int in s15 to float in s15 + vmov r0, s15 // move s15 to result register +#endif + bx lr +END_COMPILERRT_FUNCTION(__floatsisfvfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/floatunssidfvfp.S b/wasmrt/llvm-builtins/builtins/arm/floatunssidfvfp.S new file mode 100644 index 0000000..0932dab --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/floatunssidfvfp.S @@ -0,0 +1,33 @@ +//===-- floatunssidfvfp.S - Implement floatunssidfvfp ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __floatunssidfvfp(unsigned int a); +// +// Converts a 32-bit int to a double precision float. +// Uses Darwin calling convention where a double precision result is +// return in GPR register pair. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__floatunssidfvfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov s0, r0 + vcvt.f64.u32 d0, s0 +#else + vmov s15, r0 // move int to float register s15 + vcvt.f64.u32 d7, s15 // convert 32-bit int in s15 to double in d7 + vmov r0, r1, d7 // move d7 to result register pair r0/r1 +#endif + bx lr +END_COMPILERRT_FUNCTION(__floatunssidfvfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/floatunssisfvfp.S b/wasmrt/llvm-builtins/builtins/arm/floatunssisfvfp.S new file mode 100644 index 0000000..9578546 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/floatunssisfvfp.S @@ -0,0 +1,33 @@ +//===-- floatunssisfvfp.S - Implement floatunssisfvfp ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __floatunssisfvfp(unsigned int a); +// +// Converts single precision float to a 32-bit int rounding towards zero. +// Uses Darwin calling convention where a single precision result is +// return in a GPR.. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__floatunssisfvfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmov s0, r0 + vcvt.f32.u32 s0, s0 +#else + vmov s15, r0 // move int to float register s15 + vcvt.f32.u32 s15, s15 // convert 32-bit int in s15 to float in s15 + vmov r0, s15 // move s15 to result register +#endif + bx lr +END_COMPILERRT_FUNCTION(__floatunssisfvfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/fp_mode.c b/wasmrt/llvm-builtins/builtins/arm/fp_mode.c new file mode 100644 index 0000000..064f4e9 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/fp_mode.c @@ -0,0 +1,60 @@ +//===----- lib/arm/fp_mode.c - Floaing-point mode utilities -------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include "../fp_mode.h" + +#define ARM_TONEAREST 0x0 +#define ARM_UPWARD 0x1 +#define ARM_DOWNWARD 0x2 +#define ARM_TOWARDZERO 0x3 +#define ARM_RMODE_MASK (ARM_TONEAREST | ARM_UPWARD | \ + ARM_DOWNWARD | ARM_TOWARDZERO) +#define ARM_RMODE_SHIFT 22 + +#define ARM_INEXACT 0x10 + +#ifndef __ARM_FP +// For soft float targets, allow changing rounding mode by overriding the weak +// __arm_fe_default_rmode symbol. +CRT_FE_ROUND_MODE __attribute__((weak)) __arm_fe_default_rmode = + CRT_FE_TONEAREST; +#endif + +CRT_FE_ROUND_MODE __fe_getround(void) { +#ifdef __ARM_FP + uint32_t fpscr; + __asm__ __volatile__("vmrs %0, fpscr" : "=r" (fpscr)); + fpscr = fpscr >> ARM_RMODE_SHIFT & ARM_RMODE_MASK; + switch (fpscr) { + case ARM_UPWARD: + return CRT_FE_UPWARD; + case ARM_DOWNWARD: + return CRT_FE_DOWNWARD; + case ARM_TOWARDZERO: + return CRT_FE_TOWARDZERO; + case ARM_TONEAREST: + default: + return CRT_FE_TONEAREST; + } +#else + return __arm_fe_default_rmode; +#endif +} + +int __fe_raise_inexact(void) { +#ifdef __ARM_FP + uint32_t fpscr; + __asm__ __volatile__("vmrs %0, fpscr" : "=r" (fpscr)); + __asm__ __volatile__("vmsr fpscr, %0" : : "ri" (fpscr | ARM_INEXACT)); + return 0; +#else + return 0; +#endif +} diff --git a/wasmrt/llvm-builtins/builtins/arm/gedf2vfp.S b/wasmrt/llvm-builtins/builtins/arm/gedf2vfp.S new file mode 100644 index 0000000..2af9d90 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/gedf2vfp.S @@ -0,0 +1,36 @@ +//===-- gedf2vfp.S - Implement gedf2vfp -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __gedf2vfp(double a, double b); +// +// Returns one iff a >= b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gedf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 +#endif + vmrs apsr_nzcv, fpscr + ITE(ge) + movge r0, #1 // set result register to 1 if greater than or equal + movlt r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__gedf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/gesf2vfp.S b/wasmrt/llvm-builtins/builtins/arm/gesf2vfp.S new file mode 100644 index 0000000..cedd1e1 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/gesf2vfp.S @@ -0,0 +1,36 @@ +//===-- gesf2vfp.S - Implement gesf2vfp -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __gesf2vfp(float a, float b); +// +// Returns one iff a >= b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gesf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 +#endif + vmrs apsr_nzcv, fpscr + ITE(ge) + movge r0, #1 // set result register to 1 if greater than or equal + movlt r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__gesf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/gtdf2vfp.S b/wasmrt/llvm-builtins/builtins/arm/gtdf2vfp.S new file mode 100644 index 0000000..782ad8c --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/gtdf2vfp.S @@ -0,0 +1,36 @@ +//===-- gtdf2vfp.S - Implement gtdf2vfp -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __gtdf2vfp(double a, double b); +// +// Returns one iff a > b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 +#endif + vmrs apsr_nzcv, fpscr + ITE(gt) + movgt r0, #1 // set result register to 1 if equal + movle r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__gtdf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/gtsf2vfp.S b/wasmrt/llvm-builtins/builtins/arm/gtsf2vfp.S new file mode 100644 index 0000000..1cc2bd1 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/gtsf2vfp.S @@ -0,0 +1,36 @@ +//===-- gtsf2vfp.S - Implement gtsf2vfp -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __gtsf2vfp(float a, float b); +// +// Returns one iff a > b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 +#endif + vmrs apsr_nzcv, fpscr + ITE(gt) + movgt r0, #1 // set result register to 1 if equal + movle r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__gtsf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/ledf2vfp.S b/wasmrt/llvm-builtins/builtins/arm/ledf2vfp.S new file mode 100644 index 0000000..0097e4b --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/ledf2vfp.S @@ -0,0 +1,36 @@ +//===-- ledf2vfp.S - Implement ledf2vfp -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __ledf2vfp(double a, double b); +// +// Returns one iff a <= b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__ledf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 +#endif + vmrs apsr_nzcv, fpscr + ITE(ls) + movls r0, #1 // set result register to 1 if equal + movhi r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__ledf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/lesf2vfp.S b/wasmrt/llvm-builtins/builtins/arm/lesf2vfp.S new file mode 100644 index 0000000..2052d38 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/lesf2vfp.S @@ -0,0 +1,36 @@ +//===-- lesf2vfp.S - Implement lesf2vfp -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __lesf2vfp(float a, float b); +// +// Returns one iff a <= b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__lesf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 +#endif + vmrs apsr_nzcv, fpscr + ITE(ls) + movls r0, #1 // set result register to 1 if equal + movhi r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__lesf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/ltdf2vfp.S b/wasmrt/llvm-builtins/builtins/arm/ltdf2vfp.S new file mode 100644 index 0000000..a126aa9 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/ltdf2vfp.S @@ -0,0 +1,36 @@ +//===-- ltdf2vfp.S - Implement ltdf2vfp -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __ltdf2vfp(double a, double b); +// +// Returns one iff a < b and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 +#endif + vmrs apsr_nzcv, fpscr + ITE(mi) + movmi r0, #1 // set result register to 1 if equal + movpl r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__ltdf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/ltsf2vfp.S b/wasmrt/llvm-builtins/builtins/arm/ltsf2vfp.S new file mode 100644 index 0000000..ba10d71 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/ltsf2vfp.S @@ -0,0 +1,36 @@ +//===-- ltsf2vfp.S - Implement ltsf2vfp -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __ltsf2vfp(float a, float b); +// +// Returns one iff a < b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 +#endif + vmrs apsr_nzcv, fpscr + ITE(mi) + movmi r0, #1 // set result register to 1 if equal + movpl r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__ltsf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/modsi3.S b/wasmrt/llvm-builtins/builtins/arm/modsi3.S new file mode 100644 index 0000000..5312f5b --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/modsi3.S @@ -0,0 +1,59 @@ +//===-- modsi3.S - 32-bit signed integer modulus --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __modsi3 (32-bit signed integer modulus) function +// for the ARM architecture as a wrapper around the unsigned routine. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +#define ESTABLISH_FRAME \ + push {r4, r7, lr} ;\ + add r7, sp, #4 +#define CLEAR_FRAME_AND_RETURN \ + pop {r4, r7, pc} + + .syntax unified + .text + DEFINE_CODE_STATE + +@ int __modsi3(int divident, int divisor) +@ Calculate and return the remainder of the (signed) division. + + .p2align 3 +DEFINE_COMPILERRT_FUNCTION(__modsi3) +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divzero) + sdiv r2, r0, r1 + mls r0, r2, r1, r0 + bx lr +LOCAL_LABEL(divzero): + mov r0, #0 + bx lr +#else + ESTABLISH_FRAME + // Set aside the sign of the dividend. + mov r4, r0 + // Take absolute value of a and b via abs(x) = (x^(x >> 31)) - (x >> 31). + eor r2, r0, r0, asr #31 + eor r3, r1, r1, asr #31 + sub r0, r2, r0, asr #31 + sub r1, r3, r1, asr #31 + // abs(a) % abs(b) + bl SYMBOL_NAME(__umodsi3) + // Apply sign of dividend to result and return. + eor r0, r0, r4, asr #31 + sub r0, r0, r4, asr #31 + CLEAR_FRAME_AND_RETURN +#endif +END_COMPILERRT_FUNCTION(__modsi3) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/muldf3vfp.S b/wasmrt/llvm-builtins/builtins/arm/muldf3vfp.S new file mode 100644 index 0000000..9adc937 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/muldf3vfp.S @@ -0,0 +1,32 @@ +//===-- muldf3vfp.S - Implement muldf3vfp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __muldf3vfp(double a, double b); +// +// Multiplies two double precision floating point numbers using the Darwin +// calling convention where double arguments are passsed in GPR pairs +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__muldf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmul.f64 d0, d0, d1 +#else + vmov d6, r0, r1 // move first param from r0/r1 pair into d6 + vmov d7, r2, r3 // move second param from r2/r3 pair into d7 + vmul.f64 d6, d6, d7 + vmov r0, r1, d6 // move result back to r0/r1 pair +#endif + bx lr +END_COMPILERRT_FUNCTION(__muldf3vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/mulsf3vfp.S b/wasmrt/llvm-builtins/builtins/arm/mulsf3vfp.S new file mode 100644 index 0000000..a94131b --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/mulsf3vfp.S @@ -0,0 +1,32 @@ +//===-- mulsf3vfp.S - Implement mulsf3vfp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __mulsf3vfp(float a, float b); +// +// Multiplies two single precision floating point numbers using the Darwin +// calling convention where single arguments are passsed like 32-bit ints. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__mulsf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vmul.f32 s0, s0, s1 +#else + vmov s14, r0 // move first param from r0 into float register + vmov s15, r1 // move second param from r1 into float register + vmul.f32 s13, s14, s15 +#endif + vmov r0, s13 // move result back to r0 + bx lr +END_COMPILERRT_FUNCTION(__mulsf3vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/nedf2vfp.S b/wasmrt/llvm-builtins/builtins/arm/nedf2vfp.S new file mode 100644 index 0000000..32d35c4 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/nedf2vfp.S @@ -0,0 +1,35 @@ +//===-- nedf2vfp.S - Implement nedf2vfp -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// extern double __nedf2vfp(double a, double b); +// +// Returns zero if a and b are unequal and neither is NaN. +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. + + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__nedf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 +#endif + vmrs apsr_nzcv, fpscr + ITE(ne) + movne r0, #1 // set result register to 0 if unequal + moveq r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__nedf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/negdf2vfp.S b/wasmrt/llvm-builtins/builtins/arm/negdf2vfp.S new file mode 100644 index 0000000..b7cf918 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/negdf2vfp.S @@ -0,0 +1,29 @@ +//===-- negdf2vfp.S - Implement negdf2vfp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __negdf2vfp(double a, double b); +// +// Returns the negation a double precision floating point numbers using the +// Darwin calling convention where double arguments are passsed in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__negdf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vneg.f64 d0, d0 +#else + eor r1, r1, #-2147483648 // flip sign bit on double in r0/r1 pair +#endif + bx lr +END_COMPILERRT_FUNCTION(__negdf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/negsf2vfp.S b/wasmrt/llvm-builtins/builtins/arm/negsf2vfp.S new file mode 100644 index 0000000..b6d3c61 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/negsf2vfp.S @@ -0,0 +1,29 @@ +//===-- negsf2vfp.S - Implement negsf2vfp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __negsf2vfp(float a); +// +// Returns the negation of a single precision floating point numbers using the +// Darwin calling convention where single arguments are passsed like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__negsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vneg.f32 s0, s0 +#else + eor r0, r0, #-2147483648 // flip sign bit on float in r0 +#endif + bx lr +END_COMPILERRT_FUNCTION(__negsf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/nesf2vfp.S b/wasmrt/llvm-builtins/builtins/arm/nesf2vfp.S new file mode 100644 index 0000000..34c8bb4 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/nesf2vfp.S @@ -0,0 +1,36 @@ +//===-- nesf2vfp.S - Implement nesf2vfp -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __nesf2vfp(float a, float b); +// +// Returns one iff a != b and neither is NaN. +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__nesf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 +#endif + vmrs apsr_nzcv, fpscr + ITE(ne) + movne r0, #1 // set result register to 1 if unequal + moveq r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__nesf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/restore_vfp_d8_d15_regs.S b/wasmrt/llvm-builtins/builtins/arm/restore_vfp_d8_d15_regs.S new file mode 100644 index 0000000..fd6d59b --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/restore_vfp_d8_d15_regs.S @@ -0,0 +1,34 @@ +//===-- save_restore_regs.S - Implement save/restore* ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling C++ functions that need to handle thrown exceptions the +// compiler is required to save all registers and call __Unwind_SjLj_Register +// in the function prolog. But when compiling for thumb1, there are +// no instructions to access the floating point registers, so the +// compiler needs to add a call to the helper function _save_vfp_d8_d15_regs +// written in ARM to save the float registers. In the epilog, the compiler +// must also add a call to __restore_vfp_d8_d15_regs to restore those registers. +// + + .text + .syntax unified + +// +// Restore registers d8-d15 from stack +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__restore_vfp_d8_d15_regs) + vldmia sp!, {d8-d15} // pop registers d8-d15 off stack + bx lr // return to prolog +END_COMPILERRT_FUNCTION(__restore_vfp_d8_d15_regs) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/save_vfp_d8_d15_regs.S b/wasmrt/llvm-builtins/builtins/arm/save_vfp_d8_d15_regs.S new file mode 100644 index 0000000..5eb3a2f --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/save_vfp_d8_d15_regs.S @@ -0,0 +1,34 @@ +//===-- save_restore_regs.S - Implement save/restore* ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling C++ functions that need to handle thrown exceptions the +// compiler is required to save all registers and call __Unwind_SjLj_Register +// in the function prolog. But when compiling for thumb1, there are +// no instructions to access the floating point registers, so the +// compiler needs to add a call to the helper function _save_vfp_d8_d15_regs +// written in ARM to save the float registers. In the epilog, the compiler +// must also add a call to __restore_vfp_d8_d15_regs to restore those registers. +// + + .text + .syntax unified + +// +// Save registers d8-d15 onto stack +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__save_vfp_d8_d15_regs) + vstmdb sp!, {d8-d15} // push registers d8-d15 onto stack + bx lr // return to prolog +END_COMPILERRT_FUNCTION(__save_vfp_d8_d15_regs) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/softfloat-alias.list b/wasmrt/llvm-builtins/builtins/arm/softfloat-alias.list new file mode 100644 index 0000000..ab6ed21 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/softfloat-alias.list @@ -0,0 +1,21 @@ +# +# These are soft float functions which can be +# aliased to the *vfp functions on arm processors +# that support floating point instructions. +# +___adddf3vfp ___adddf3 +___addsf3vfp ___addsf3 +___divdf3vfp ___divdf3 +___divsf3vfp ___divsf3 +___extendsfdf2vfp ___extendsfdf2 +___fixdfsivfp ___fixdfsi +___fixsfsivfp ___fixsfsi +___floatsidfvfp ___floatsidf +___floatsisfvfp ___floatsisf +___muldf3vfp ___muldf3 +___mulsf3vfp ___mulsf3 +___subdf3vfp ___subdf3 +___subsf3vfp ___subsf3 +___truncdfsf2vfp ___truncdfsf2 +___floatunssidfvfp ___floatunsidf +___floatunssisfvfp ___floatunsisf diff --git a/wasmrt/llvm-builtins/builtins/arm/subdf3vfp.S b/wasmrt/llvm-builtins/builtins/arm/subdf3vfp.S new file mode 100644 index 0000000..f4eaf9a --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/subdf3vfp.S @@ -0,0 +1,32 @@ +//===-- subdf3vfp.S - Implement subdf3vfp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern double __subdf3vfp(double a, double b); +// +// Returns difference between two double precision floating point numbers using +// the Darwin calling convention where double arguments are passsed in GPR pairs +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__subdf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vsub.f64 d0, d0, d1 +#else + vmov d6, r0, r1 // move first param from r0/r1 pair into d6 + vmov d7, r2, r3 // move second param from r2/r3 pair into d7 + vsub.f64 d6, d6, d7 + vmov r0, r1, d6 // move result back to r0/r1 pair +#endif + bx lr +END_COMPILERRT_FUNCTION(__subdf3vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/subsf3vfp.S b/wasmrt/llvm-builtins/builtins/arm/subsf3vfp.S new file mode 100644 index 0000000..80e69f2 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/subsf3vfp.S @@ -0,0 +1,33 @@ +//===-- subsf3vfp.S - Implement subsf3vfp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __subsf3vfp(float a, float b); +// +// Returns the difference between two single precision floating point numbers +// using the Darwin calling convention where single arguments are passsed +// like 32-bit ints. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__subsf3vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vsub.f32 s0, s0, s1 +#else + vmov s14, r0 // move first param from r0 into float register + vmov s15, r1 // move second param from r1 into float register + vsub.f32 s14, s14, s15 + vmov r0, s14 // move result back to r0 +#endif + bx lr +END_COMPILERRT_FUNCTION(__subsf3vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/switch16.S b/wasmrt/llvm-builtins/builtins/arm/switch16.S new file mode 100644 index 0000000..a4b568d --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/switch16.S @@ -0,0 +1,45 @@ +//===-- switch.S - Implement switch* --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling switch statements in thumb mode, the compiler +// can use these __switch* helper functions The compiler emits a blx to +// the __switch* function followed by a table of displacements for each +// case statement. On entry, R0 is the index into the table. The __switch* +// function uses the return address in lr to find the start of the table. +// The first entry in the table is the count of the entries in the table. +// It then uses R0 to index into the table and get the displacement of the +// address to jump to. If R0 is greater than the size of the table, it jumps +// to the last entry in the table. Each displacement in the table is actually +// the distance from lr to the label, thus making the tables PIC. + + + .text + .syntax unified + +// +// The table contains signed 2-byte sized elements which are 1/2 the distance +// from lr to the target label. +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch16) + ldrh ip, [lr, #-1] // get first 16-bit word in table + cmp r0, ip // compare with index + add r0, lr, r0, lsl #1 // compute address of element in table + add ip, lr, ip, lsl #1 // compute address of last element in table + ite lo + ldrshlo r0, [r0, #1] // load 16-bit element if r0 is in range + ldrshhs r0, [ip, #1] // load 16-bit element if r0 out of range + add ip, lr, r0, lsl #1 // compute label = lr + element*2 + bx ip // jump to computed label +END_COMPILERRT_FUNCTION(__switch16) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/switch32.S b/wasmrt/llvm-builtins/builtins/arm/switch32.S new file mode 100644 index 0000000..f2a5af5 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/switch32.S @@ -0,0 +1,45 @@ +//===-- switch.S - Implement switch* --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling switch statements in thumb mode, the compiler +// can use these __switch* helper functions The compiler emits a blx to +// the __switch* function followed by a table of displacements for each +// case statement. On entry, R0 is the index into the table. The __switch* +// function uses the return address in lr to find the start of the table. +// The first entry in the table is the count of the entries in the table. +// It then uses R0 to index into the table and get the displacement of the +// address to jump to. If R0 is greater than the size of the table, it jumps +// to the last entry in the table. Each displacement in the table is actually +// the distance from lr to the label, thus making the tables PIC. + + + .text + .syntax unified + +// +// The table contains signed 4-byte sized elements which are the distance +// from lr to the target label. +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch32) + ldr ip, [lr, #-1] // get first 32-bit word in table + cmp r0, ip // compare with index + add r0, lr, r0, lsl #2 // compute address of element in table + add ip, lr, ip, lsl #2 // compute address of last element in table + ite lo + ldrlo r0, [r0, #3] // load 32-bit element if r0 is in range + ldrhs r0, [ip, #3] // load 32-bit element if r0 out of range + add ip, lr, r0 // compute label = lr + element + bx ip // jump to computed label +END_COMPILERRT_FUNCTION(__switch32) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/switch8.S b/wasmrt/llvm-builtins/builtins/arm/switch8.S new file mode 100644 index 0000000..0db875c --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/switch8.S @@ -0,0 +1,43 @@ +//===-- switch.S - Implement switch* --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling switch statements in thumb mode, the compiler +// can use these __switch* helper functions The compiler emits a blx to +// the __switch* function followed by a table of displacements for each +// case statement. On entry, R0 is the index into the table. The __switch* +// function uses the return address in lr to find the start of the table. +// The first entry in the table is the count of the entries in the table. +// It then uses R0 to index into the table and get the displacement of the +// address to jump to. If R0 is greater than the size of the table, it jumps +// to the last entry in the table. Each displacement in the table is actually +// the distance from lr to the label, thus making the tables PIC. + + + .text + .syntax unified + +// +// The table contains signed byte sized elements which are 1/2 the distance +// from lr to the target label. +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch8) + ldrb ip, [lr, #-1] // get first byte in table + cmp r0, ip // signed compare with index + ite lo + ldrsblo r0, [lr, r0] // get indexed byte out of table + ldrsbhs r0, [lr, ip] // if out of range, use last entry in table + add ip, lr, r0, lsl #1 // compute label = lr + element*2 + bx ip // jump to computed label +END_COMPILERRT_FUNCTION(__switch8) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/switchu8.S b/wasmrt/llvm-builtins/builtins/arm/switchu8.S new file mode 100644 index 0000000..551abeb --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/switchu8.S @@ -0,0 +1,43 @@ +//===-- switch.S - Implement switch* --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// When compiling switch statements in thumb mode, the compiler +// can use these __switch* helper functions The compiler emits a blx to +// the __switch* function followed by a table of displacements for each +// case statement. On entry, R0 is the index into the table. The __switch* +// function uses the return address in lr to find the start of the table. +// The first entry in the table is the count of the entries in the table. +// It then uses R0 to index into the table and get the displacement of the +// address to jump to. If R0 is greater than the size of the table, it jumps +// to the last entry in the table. Each displacement in the table is actually +// the distance from lr to the label, thus making the tables PIC. + + + .text + .syntax unified + +// +// The table contains unsigned byte sized elements which are 1/2 the distance +// from lr to the target label. +// + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switchu8) + ldrb ip, [lr, #-1] // get first byte in table + cmp r0, ip // compare with index + ite lo + ldrblo r0, [lr, r0] // get indexed byte out of table + ldrbhs r0, [lr, ip] // if out of range, use last entry in table + add ip, lr, r0, lsl #1 // compute label = lr + element*2 + bx ip // jump to computed label +END_COMPILERRT_FUNCTION(__switchu8) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync-ops.h b/wasmrt/llvm-builtins/builtins/arm/sync-ops.h new file mode 100644 index 0000000..dca201d --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync-ops.h @@ -0,0 +1,67 @@ +//===-- sync-ops.h - --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements outline macros for the __sync_fetch_and_* +// operations. Different instantiations will generate appropriate assembly for +// ARM and Thumb-2 versions of the functions. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +#if __ARM_ARCH >= 7 +#define DMB dmb +#elif __ARM_ARCH >= 6 +#define DMB mcr p15, #0, r0, c7, c10, #5 +#else +#error DMB is only supported on ARMv6+ +#endif + +#define SYNC_OP_4(op) \ + .p2align 2; \ + .syntax unified; \ + DEFINE_COMPILERRT_FUNCTION(__sync_fetch_and_##op) \ + DMB; \ + mov r12, r0; \ + LOCAL_LABEL(tryatomic_##op) : ldrex r0, [r12]; \ + op(r2, r0, r1); \ + strex r3, r2, [r12]; \ + cmp r3, #0; \ + bne LOCAL_LABEL(tryatomic_##op); \ + DMB; \ + bx lr + +#define SYNC_OP_8(op) \ + .p2align 2; \ + .syntax unified; \ + DEFINE_COMPILERRT_FUNCTION(__sync_fetch_and_##op) \ + push {r4, r5, r6, lr}; \ + DMB; \ + mov r12, r0; \ + LOCAL_LABEL(tryatomic_##op) : ldrexd r0, r1, [r12]; \ + op(r4, r5, r0, r1, r2, r3); \ + strexd r6, r4, r5, [r12]; \ + cmp r6, #0; \ + bne LOCAL_LABEL(tryatomic_##op); \ + DMB; \ + pop { r4, r5, r6, pc } + +#define MINMAX_4(rD, rN, rM, cmp_kind) \ + cmp rN, rM; \ + mov rD, rM; \ + it cmp_kind; \ + mov##cmp_kind rD, rN + +#define MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, cmp_kind) \ + cmp rN_LO, rM_LO; \ + sbcs rN_HI, rM_HI; \ + mov rD_LO, rM_LO; \ + mov rD_HI, rM_HI; \ + itt cmp_kind; \ + mov##cmp_kind rD_LO, rN_LO; \ + mov##cmp_kind rD_HI, rN_HI diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_add_4.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_add_4.S new file mode 100644 index 0000000..0d55975 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_add_4.S @@ -0,0 +1,22 @@ +//===-- sync_fetch_and_add_4.S - ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_add_4 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +// "adds" is 2 bytes shorter than "add". +#define add_4(rD, rN, rM) add rD, rN, rM + +SYNC_OP_4(add_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_add_8.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_add_8.S new file mode 100644 index 0000000..18bdd87 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_add_8.S @@ -0,0 +1,25 @@ +//===-- sync_fetch_and_add_8.S - ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_add_8 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define add_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + adds rD_LO, rN_LO, rM_LO ; \ + adc rD_HI, rN_HI, rM_HI + +SYNC_OP_8(add_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_and_4.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_and_4.S new file mode 100644 index 0000000..3a76acc --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_and_4.S @@ -0,0 +1,21 @@ +//===-- sync_fetch_and_and_4.S - ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_and_4 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +#define and_4(rD, rN, rM) and rD, rN, rM + +SYNC_OP_4(and_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_and_8.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_and_8.S new file mode 100644 index 0000000..3716eff --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_and_8.S @@ -0,0 +1,25 @@ +//===-- sync_fetch_and_and_8.S - ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_and_8 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define and_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + and rD_LO, rN_LO, rM_LO ; \ + and rD_HI, rN_HI, rM_HI + +SYNC_OP_8(and_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_max_4.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_max_4.S new file mode 100644 index 0000000..b9cee45 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_max_4.S @@ -0,0 +1,21 @@ +//===-- sync_fetch_and_max_4.S - ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_max_4 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +#define max_4(rD, rN, rM) MINMAX_4(rD, rN, rM, gt) + +SYNC_OP_4(max_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_max_8.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_max_8.S new file mode 100644 index 0000000..06115ab --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_max_8.S @@ -0,0 +1,23 @@ +//===-- sync_fetch_and_max_8.S - ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_max_8 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define max_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, gt) + +SYNC_OP_8(max_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_min_4.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_min_4.S new file mode 100644 index 0000000..60d435a --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_min_4.S @@ -0,0 +1,21 @@ +//===-- sync_fetch_and_min_4.S - ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_min_4 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +#define min_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lt) + +SYNC_OP_4(min_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_min_8.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_min_8.S new file mode 100644 index 0000000..4f3e299 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_min_8.S @@ -0,0 +1,23 @@ +//===-- sync_fetch_and_min_8.S - ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_min_8 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define min_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lt) + +SYNC_OP_8(min_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_nand_4.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_nand_4.S new file mode 100644 index 0000000..5a04be0 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_nand_4.S @@ -0,0 +1,21 @@ +//===-- sync_fetch_and_nand_4.S - -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_nand_4 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +#define nand_4(rD, rN, rM) bic rD, rN, rM + +SYNC_OP_4(nand_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_nand_8.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_nand_8.S new file mode 100644 index 0000000..425c944 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_nand_8.S @@ -0,0 +1,25 @@ +//===-- sync_fetch_and_nand_8.S - ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_nand_8 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define nand_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + bic rD_LO, rN_LO, rM_LO ; \ + bic rD_HI, rN_HI, rM_HI + +SYNC_OP_8(nand_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_or_4.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_or_4.S new file mode 100644 index 0000000..f44751b --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_or_4.S @@ -0,0 +1,21 @@ +//===-- sync_fetch_and_or_4.S - -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_or_4 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +#define or_4(rD, rN, rM) orr rD, rN, rM + +SYNC_OP_4(or_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_or_8.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_or_8.S new file mode 100644 index 0000000..4f18dcf --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_or_8.S @@ -0,0 +1,25 @@ +//===-- sync_fetch_and_or_8.S - -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_or_8 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define or_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + orr rD_LO, rN_LO, rM_LO ; \ + orr rD_HI, rN_HI, rM_HI + +SYNC_OP_8(or_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_sub_4.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_sub_4.S new file mode 100644 index 0000000..999d48c --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_sub_4.S @@ -0,0 +1,22 @@ +//===-- sync_fetch_and_sub_4.S - ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_sub_4 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +// "subs" is 2 bytes shorter than "sub". +#define sub_4(rD, rN, rM) sub rD, rN, rM + +SYNC_OP_4(sub_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_sub_8.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_sub_8.S new file mode 100644 index 0000000..25a4a10 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_sub_8.S @@ -0,0 +1,25 @@ +//===-- sync_fetch_and_sub_8.S - ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_sub_8 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define sub_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + subs rD_LO, rN_LO, rM_LO ; \ + sbc rD_HI, rN_HI, rM_HI + +SYNC_OP_8(sub_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_umax_4.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_umax_4.S new file mode 100644 index 0000000..a7b233b --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_umax_4.S @@ -0,0 +1,21 @@ +//===-- sync_fetch_and_umax_4.S - ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_umax_4 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +#define umax_4(rD, rN, rM) MINMAX_4(rD, rN, rM, hi) + +SYNC_OP_4(umax_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_umax_8.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_umax_8.S new file mode 100644 index 0000000..aa5213f --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_umax_8.S @@ -0,0 +1,23 @@ +//===-- sync_fetch_and_umax_8.S - ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_umax_8 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define umax_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, hi) + +SYNC_OP_8(umax_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_umin_4.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_umin_4.S new file mode 100644 index 0000000..c7a9c89 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_umin_4.S @@ -0,0 +1,21 @@ +//===-- sync_fetch_and_umin_4.S - ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_umin_4 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +#define umin_4(rD, rN, rM) MINMAX_4(rD, rN, rM, lo) + +SYNC_OP_4(umin_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_umin_8.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_umin_8.S new file mode 100644 index 0000000..8b40541 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_umin_8.S @@ -0,0 +1,23 @@ +//===-- sync_fetch_and_umin_8.S - ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_umin_8 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define umin_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) MINMAX_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI, lo) + +SYNC_OP_8(umin_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_xor_4.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_xor_4.S new file mode 100644 index 0000000..f509191 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_xor_4.S @@ -0,0 +1,21 @@ +//===-- sync_fetch_and_xor_4.S - ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_xor_4 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +#define xor_4(rD, rN, rM) eor rD, rN, rM + +SYNC_OP_4(xor_4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_xor_8.S b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_xor_8.S new file mode 100644 index 0000000..7436eb1 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_fetch_and_xor_8.S @@ -0,0 +1,25 @@ +//===-- sync_fetch_and_xor_8.S - ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __sync_fetch_and_xor_8 function for the ARM +// architecture. +// +//===----------------------------------------------------------------------===// + +#include "sync-ops.h" + +#if __ARM_ARCH_PROFILE != 'M' +#define xor_8(rD_LO, rD_HI, rN_LO, rN_HI, rM_LO, rM_HI) \ + eor rD_LO, rN_LO, rM_LO ; \ + eor rD_HI, rN_HI, rM_HI + +SYNC_OP_8(xor_8) +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/sync_synchronize.S b/wasmrt/llvm-builtins/builtins/arm/sync_synchronize.S new file mode 100644 index 0000000..dd06e71 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/sync_synchronize.S @@ -0,0 +1,35 @@ +//===-- sync_synchronize - Implement memory barrier * ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// When compiling a use of the gcc built-in __sync_synchronize() in thumb1 mode +// the compiler may emit a call to __sync_synchronize. +// On Darwin the implementation jumps to an OS supplied function named +// OSMemoryBarrier + + .text + .syntax unified + +#if __APPLE__ + + .p2align 2 +DEFINE_COMPILERRT_PRIVATE_FUNCTION(__sync_synchronize) + stmfd sp!, {r7, lr} + add r7, sp, #0 + bl _OSMemoryBarrier + ldmfd sp!, {r7, pc} +END_COMPILERRT_FUNCTION(__sync_synchronize) + + // tell linker it can break up file at label boundaries + .subsections_via_symbols + +#endif + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/truncdfsf2vfp.S b/wasmrt/llvm-builtins/builtins/arm/truncdfsf2vfp.S new file mode 100644 index 0000000..e1c1712 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/truncdfsf2vfp.S @@ -0,0 +1,32 @@ +//===-- truncdfsf2vfp.S - Implement truncdfsf2vfp -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern float __truncdfsf2vfp(double a); +// +// Converts double precision float to single precision result. +// Uses Darwin calling convention where a double precision parameter is +// passed in a R0/R1 pair and a single precision result is returned in R0. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__truncdfsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcvt.f32.f64 s0, d0 +#else + vmov d7, r0, r1 // load double from r0/r1 pair + vcvt.f32.f64 s15, d7 // convert double to single (trucate precision) + vmov r0, s15 // return result in r0 +#endif + bx lr +END_COMPILERRT_FUNCTION(__truncdfsf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/udivmodsi4.S b/wasmrt/llvm-builtins/builtins/arm/udivmodsi4.S new file mode 100644 index 0000000..0f40575 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/udivmodsi4.S @@ -0,0 +1,178 @@ +//===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __udivmodsi4 (32-bit unsigned integer divide and +// modulus) function for the ARM 32-bit architecture. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + + .syntax unified + .text + DEFINE_CODE_STATE + +@ unsigned int __udivmodsi4(unsigned int divident, unsigned int divisor, +@ unsigned int *remainder) +@ Calculate the quotient and remainder of the (unsigned) division. The return +@ value is the quotient, the remainder is placed in the variable. + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__udivmodsi4) +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divby0) + mov r3, r0 + udiv r0, r3, r1 + mls r1, r0, r1, r3 + str r1, [r2] + bx lr +#else + cmp r1, #1 + bcc LOCAL_LABEL(divby0) + beq LOCAL_LABEL(divby1) + cmp r0, r1 + bcc LOCAL_LABEL(quotient0) + + // Implement division using binary long division algorithm. + // + // r0 is the numerator, r1 the denominator. + // + // The code before JMP computes the correct shift I, so that + // r0 and (r1 << I) have the highest bit set in the same position. + // At the time of JMP, ip := .Ldiv0block - 12 * I. + // This depends on the fixed instruction size of block. + // For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. + // + // block(shift) implements the test-and-update-quotient core. + // It assumes (r0 << shift) can be computed without overflow and + // that (r0 << shift) < 2 * r1. The quotient is stored in r3. + +# ifdef __ARM_FEATURE_CLZ + clz ip, r0 + clz r3, r1 + // r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. + sub r3, r3, ip +# if defined(USE_THUMB_2) + adr ip, LOCAL_LABEL(div0block) + 1 + sub ip, ip, r3, lsl #1 +# else + adr ip, LOCAL_LABEL(div0block) +# endif + sub ip, ip, r3, lsl #2 + sub ip, ip, r3, lsl #3 + mov r3, #0 + bx ip +# else +# if defined(USE_THUMB_2) +# error THUMB mode requires CLZ or UDIV +# endif + str r4, [sp, #-8]! + + mov r4, r0 + adr ip, LOCAL_LABEL(div0block) + + lsr r3, r4, #16 + cmp r3, r1 + movhs r4, r3 + subhs ip, ip, #(16 * 12) + + lsr r3, r4, #8 + cmp r3, r1 + movhs r4, r3 + subhs ip, ip, #(8 * 12) + + lsr r3, r4, #4 + cmp r3, r1 + movhs r4, r3 + subhs ip, #(4 * 12) + + lsr r3, r4, #2 + cmp r3, r1 + movhs r4, r3 + subhs ip, ip, #(2 * 12) + + // Last block, no need to update r3 or r4. + cmp r1, r4, lsr #1 + subls ip, ip, #(1 * 12) + + ldr r4, [sp], #8 // restore r4, we are done with it. + mov r3, #0 + + JMP(ip) +# endif + +#define IMM # + +#define block(shift) \ + cmp r0, r1, lsl IMM shift; \ + ITT(hs); \ + WIDE(addhs) r3, r3, IMM (1 << shift); \ + WIDE(subhs) r0, r0, r1, lsl IMM shift + + block(31) + block(30) + block(29) + block(28) + block(27) + block(26) + block(25) + block(24) + block(23) + block(22) + block(21) + block(20) + block(19) + block(18) + block(17) + block(16) + block(15) + block(14) + block(13) + block(12) + block(11) + block(10) + block(9) + block(8) + block(7) + block(6) + block(5) + block(4) + block(3) + block(2) + block(1) +LOCAL_LABEL(div0block): + block(0) + + str r0, [r2] + mov r0, r3 + JMP(lr) + +LOCAL_LABEL(quotient0): + str r0, [r2] + mov r0, #0 + JMP(lr) + +LOCAL_LABEL(divby1): + mov r3, #0 + str r3, [r2] + JMP(lr) +#endif // __ARM_ARCH_EXT_IDIV__ + +LOCAL_LABEL(divby0): + mov r0, #0 +#ifdef __ARM_EABI__ + b __aeabi_idiv0 +#else + JMP(lr) +#endif + +END_COMPILERRT_FUNCTION(__udivmodsi4) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/udivsi3.S b/wasmrt/llvm-builtins/builtins/arm/udivsi3.S new file mode 100644 index 0000000..0567ab4 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/udivsi3.S @@ -0,0 +1,262 @@ +//===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __udivsi3 (32-bit unsigned integer divide) +// function for the ARM 32-bit architecture. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + + .syntax unified + .text + +DEFINE_CODE_STATE + + .p2align 2 +DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3) + +@ unsigned int __udivsi3(unsigned int divident, unsigned int divisor) +@ Calculate and return the quotient of the (unsigned) division. + +DEFINE_COMPILERRT_FUNCTION(__udivsi3) +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divby0) + udiv r0, r0, r1 + bx lr + +LOCAL_LABEL(divby0): + // Use movs for compatibility with v8-m.base. + movs r0, #0 +# ifdef __ARM_EABI__ + b __aeabi_idiv0 +# else + JMP(lr) +# endif + +#else // ! __ARM_ARCH_EXT_IDIV__ + cmp r1, #1 + bcc LOCAL_LABEL(divby0) +#if defined(USE_THUMB_1) + bne LOCAL_LABEL(num_neq_denom) + JMP(lr) +LOCAL_LABEL(num_neq_denom): +#else + IT(eq) + JMPc(lr, eq) +#endif + cmp r0, r1 +#if defined(USE_THUMB_1) + bhs LOCAL_LABEL(num_ge_denom) + movs r0, #0 + JMP(lr) +LOCAL_LABEL(num_ge_denom): +#else + ITT(cc) + movcc r0, #0 + JMPc(lr, cc) +#endif + + // Implement division using binary long division algorithm. + // + // r0 is the numerator, r1 the denominator. + // + // The code before JMP computes the correct shift I, so that + // r0 and (r1 << I) have the highest bit set in the same position. + // At the time of JMP, ip := .Ldiv0block - 12 * I. + // This depends on the fixed instruction size of block. + // For ARM mode, this is 12 Bytes, for THUMB mode 14 Bytes. + // + // block(shift) implements the test-and-update-quotient core. + // It assumes (r0 << shift) can be computed without overflow and + // that (r0 << shift) < 2 * r1. The quotient is stored in r3. + +# if defined(__ARM_FEATURE_CLZ) + clz ip, r0 + clz r3, r1 + // r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. + sub r3, r3, ip +# if defined(USE_THUMB_2) + adr ip, LOCAL_LABEL(div0block) + 1 + sub ip, ip, r3, lsl #1 +# else + adr ip, LOCAL_LABEL(div0block) +# endif + sub ip, ip, r3, lsl #2 + sub ip, ip, r3, lsl #3 + mov r3, #0 + bx ip +# else // No CLZ Feature +# if defined(USE_THUMB_2) +# error THUMB mode requires CLZ or UDIV +# endif +# if defined(USE_THUMB_1) +# define BLOCK_SIZE 10 +# else +# define BLOCK_SIZE 12 +# endif + + mov r2, r0 +# if defined(USE_THUMB_1) + mov ip, r0 + adr r0, LOCAL_LABEL(div0block) + adds r0, #1 +# else + adr ip, LOCAL_LABEL(div0block) +# endif + lsrs r3, r2, #16 + cmp r3, r1 +# if defined(USE_THUMB_1) + blo LOCAL_LABEL(skip_16) + movs r2, r3 + subs r0, r0, #(16 * BLOCK_SIZE) +LOCAL_LABEL(skip_16): +# else + movhs r2, r3 + subhs ip, ip, #(16 * BLOCK_SIZE) +# endif + + lsrs r3, r2, #8 + cmp r3, r1 +# if defined(USE_THUMB_1) + blo LOCAL_LABEL(skip_8) + movs r2, r3 + subs r0, r0, #(8 * BLOCK_SIZE) +LOCAL_LABEL(skip_8): +# else + movhs r2, r3 + subhs ip, ip, #(8 * BLOCK_SIZE) +# endif + + lsrs r3, r2, #4 + cmp r3, r1 +# if defined(USE_THUMB_1) + blo LOCAL_LABEL(skip_4) + movs r2, r3 + subs r0, r0, #(4 * BLOCK_SIZE) +LOCAL_LABEL(skip_4): +# else + movhs r2, r3 + subhs ip, #(4 * BLOCK_SIZE) +# endif + + lsrs r3, r2, #2 + cmp r3, r1 +# if defined(USE_THUMB_1) + blo LOCAL_LABEL(skip_2) + movs r2, r3 + subs r0, r0, #(2 * BLOCK_SIZE) +LOCAL_LABEL(skip_2): +# else + movhs r2, r3 + subhs ip, ip, #(2 * BLOCK_SIZE) +# endif + + // Last block, no need to update r2 or r3. +# if defined(USE_THUMB_1) + lsrs r3, r2, #1 + cmp r3, r1 + blo LOCAL_LABEL(skip_1) + subs r0, r0, #(1 * BLOCK_SIZE) +LOCAL_LABEL(skip_1): + movs r2, r0 + mov r0, ip + movs r3, #0 + JMP (r2) + +# else + cmp r1, r2, lsr #1 + subls ip, ip, #(1 * BLOCK_SIZE) + + movs r3, #0 + + JMP(ip) +# endif +# endif // __ARM_FEATURE_CLZ + + +#define IMM # + // due to the range limit of branch in Thumb1, we have to place the + // block closer +LOCAL_LABEL(divby0): + movs r0, #0 +# if defined(__ARM_EABI__) + push {r7, lr} + bl __aeabi_idiv0 // due to relocation limit, can't use b. + pop {r7, pc} +# else + JMP(lr) +# endif + + +#if defined(USE_THUMB_1) +#define block(shift) \ + lsls r2, r1, IMM shift; \ + cmp r0, r2; \ + blo LOCAL_LABEL(block_skip_##shift); \ + subs r0, r0, r2; \ + LOCAL_LABEL(block_skip_##shift) :; \ + adcs r3, r3 // same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. + + // TODO: if current location counter is not not word aligned, we don't + // need the .p2align and nop + // Label div0block must be word-aligned. First align block 31 + .p2align 2 + nop // Padding to align div0block as 31 blocks = 310 bytes + +#else +#define block(shift) \ + cmp r0, r1, lsl IMM shift; \ + ITT(hs); \ + WIDE(addhs) r3, r3, IMM (1 << shift); \ + WIDE(subhs) r0, r0, r1, lsl IMM shift +#endif + + block(31) + block(30) + block(29) + block(28) + block(27) + block(26) + block(25) + block(24) + block(23) + block(22) + block(21) + block(20) + block(19) + block(18) + block(17) + block(16) + block(15) + block(14) + block(13) + block(12) + block(11) + block(10) + block(9) + block(8) + block(7) + block(6) + block(5) + block(4) + block(3) + block(2) + block(1) +LOCAL_LABEL(div0block): + block(0) + + mov r0, r3 + JMP(lr) +#endif // __ARM_ARCH_EXT_IDIV__ + +END_COMPILERRT_FUNCTION(__udivsi3) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/umodsi3.S b/wasmrt/llvm-builtins/builtins/arm/umodsi3.S new file mode 100644 index 0000000..5ab78de --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/umodsi3.S @@ -0,0 +1,156 @@ +//===-- umodsi3.S - 32-bit unsigned integer modulus -----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the __umodsi3 (32-bit unsigned integer modulus) +// function for the ARM 32-bit architecture. +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + + .syntax unified + .text + DEFINE_CODE_STATE + +@ unsigned int __umodsi3(unsigned int divident, unsigned int divisor) +@ Calculate and return the remainder of the (unsigned) division. + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__umodsi3) +#if __ARM_ARCH_EXT_IDIV__ + tst r1, r1 + beq LOCAL_LABEL(divby0) + udiv r2, r0, r1 + mls r0, r2, r1, r0 + bx lr +#else + cmp r1, #1 + bcc LOCAL_LABEL(divby0) + ITT(eq) + moveq r0, #0 + JMPc(lr, eq) + cmp r0, r1 + IT(cc) + JMPc(lr, cc) + + // Implement division using binary long division algorithm. + // + // r0 is the numerator, r1 the denominator. + // + // The code before JMP computes the correct shift I, so that + // r0 and (r1 << I) have the highest bit set in the same position. + // At the time of JMP, ip := .Ldiv0block - 8 * I. + // This depends on the fixed instruction size of block. + // For ARM mode, this is 8 Bytes, for THUMB mode 10 Bytes. + // + // block(shift) implements the test-and-update-quotient core. + // It assumes (r0 << shift) can be computed without overflow and + // that (r0 << shift) < 2 * r1. The quotient is stored in r3. + +# ifdef __ARM_FEATURE_CLZ + clz ip, r0 + clz r3, r1 + // r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. + sub r3, r3, ip +# if defined(USE_THUMB_2) + adr ip, LOCAL_LABEL(div0block) + 1 + sub ip, ip, r3, lsl #1 +# else + adr ip, LOCAL_LABEL(div0block) +# endif + sub ip, ip, r3, lsl #3 + bx ip +# else +# if defined(USE_THUMB_2) +# error THUMB mode requires CLZ or UDIV +# endif + mov r2, r0 + adr ip, LOCAL_LABEL(div0block) + + lsr r3, r2, #16 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(16 * 8) + + lsr r3, r2, #8 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(8 * 8) + + lsr r3, r2, #4 + cmp r3, r1 + movhs r2, r3 + subhs ip, #(4 * 8) + + lsr r3, r2, #2 + cmp r3, r1 + movhs r2, r3 + subhs ip, ip, #(2 * 8) + + // Last block, no need to update r2 or r3. + cmp r1, r2, lsr #1 + subls ip, ip, #(1 * 8) + + JMP(ip) +# endif + +#define IMM # + +#define block(shift) \ + cmp r0, r1, lsl IMM shift; \ + IT(hs); \ + WIDE(subhs) r0, r0, r1, lsl IMM shift + + block(31) + block(30) + block(29) + block(28) + block(27) + block(26) + block(25) + block(24) + block(23) + block(22) + block(21) + block(20) + block(19) + block(18) + block(17) + block(16) + block(15) + block(14) + block(13) + block(12) + block(11) + block(10) + block(9) + block(8) + block(7) + block(6) + block(5) + block(4) + block(3) + block(2) + block(1) +LOCAL_LABEL(div0block): + block(0) + JMP(lr) +#endif // __ARM_ARCH_EXT_IDIV__ + +LOCAL_LABEL(divby0): + mov r0, #0 +#ifdef __ARM_EABI__ + b __aeabi_idiv0 +#else + JMP(lr) +#endif + +END_COMPILERRT_FUNCTION(__umodsi3) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/unorddf2vfp.S b/wasmrt/llvm-builtins/builtins/arm/unorddf2vfp.S new file mode 100644 index 0000000..ea36a1c --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/unorddf2vfp.S @@ -0,0 +1,36 @@ +//===-- unorddf2vfp.S - Implement unorddf2vfp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __unorddf2vfp(double a, double b); +// +// Returns one iff a or b is NaN +// Uses Darwin calling convention where double precision arguments are passsed +// like in GPR pairs. +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f64 d0, d1 +#else + vmov d6, r0, r1 // load r0/r1 pair in double register + vmov d7, r2, r3 // load r2/r3 pair in double register + vcmp.f64 d6, d7 +#endif + vmrs apsr_nzcv, fpscr + ITE(vs) + movvs r0, #1 // set result register to 1 if "overflow" (any NaNs) + movvc r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__unorddf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/arm/unordsf2vfp.S b/wasmrt/llvm-builtins/builtins/arm/unordsf2vfp.S new file mode 100644 index 0000000..7311297 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/arm/unordsf2vfp.S @@ -0,0 +1,36 @@ +//===-- unordsf2vfp.S - Implement unordsf2vfp -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../assembly.h" + +// +// extern int __unordsf2vfp(float a, float b); +// +// Returns one iff a or b is NaN +// Uses Darwin calling convention where single precision arguments are passsed +// like 32-bit ints +// + .syntax unified + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp) +#if defined(COMPILER_RT_ARMHF_TARGET) + vcmp.f32 s0, s1 +#else + vmov s14, r0 // move from GPR 0 to float register + vmov s15, r1 // move from GPR 1 to float register + vcmp.f32 s14, s15 +#endif + vmrs apsr_nzcv, fpscr + ITE(vs) + movvs r0, #1 // set result register to 1 if "overflow" (any NaNs) + movvc r0, #0 + bx lr +END_COMPILERRT_FUNCTION(__unordsf2vfp) + +NO_EXEC_STACK_DIRECTIVE + diff --git a/wasmrt/llvm-builtins/builtins/ashldi3.c b/wasmrt/llvm-builtins/builtins/ashldi3.c new file mode 100644 index 0000000..04f2222 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/ashldi3.c @@ -0,0 +1,38 @@ +// ====-- ashldi3.c - Implement __ashldi3 ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __ashldi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: a << b + +// Precondition: 0 <= b < bits_in_dword + +COMPILER_RT_ABI di_int __ashldi3(di_int a, int b) { + const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); + dwords input; + dwords result; + input.all = a; + if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ { + result.s.low = 0; + result.s.high = input.s.low << (b - bits_in_word); + } else /* 0 <= b < bits_in_word */ { + if (b == 0) + return a; + result.s.low = input.s.low << b; + result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_word - b)); + } + return result.all; +} + +#if defined(__ARM_EABI__) +COMPILER_RT_ALIAS(__ashldi3, __aeabi_llsl) +#endif diff --git a/wasmrt/llvm-builtins/builtins/ashlti3.c b/wasmrt/llvm-builtins/builtins/ashlti3.c new file mode 100644 index 0000000..99a133f --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/ashlti3.c @@ -0,0 +1,38 @@ +//===-- ashlti3.c - Implement __ashlti3 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __ashlti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +// Returns: a << b + +// Precondition: 0 <= b < bits_in_tword + +COMPILER_RT_ABI ti_int __ashlti3(ti_int a, int b) { + const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); + twords input; + twords result; + input.all = a; + if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ { + result.s.low = 0; + result.s.high = input.s.low << (b - bits_in_dword); + } else /* 0 <= b < bits_in_dword */ { + if (b == 0) + return a; + result.s.low = input.s.low << b; + result.s.high = (input.s.high << b) | (input.s.low >> (bits_in_dword - b)); + } + return result.all; +} + +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/ashrdi3.c b/wasmrt/llvm-builtins/builtins/ashrdi3.c new file mode 100644 index 0000000..934a5c4 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/ashrdi3.c @@ -0,0 +1,39 @@ +//===-- ashrdi3.c - Implement __ashrdi3 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __ashrdi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: arithmetic a >> b + +// Precondition: 0 <= b < bits_in_dword + +COMPILER_RT_ABI di_int __ashrdi3(di_int a, int b) { + const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); + dwords input; + dwords result; + input.all = a; + if (b & bits_in_word) /* bits_in_word <= b < bits_in_dword */ { + // result.s.high = input.s.high < 0 ? -1 : 0 + result.s.high = input.s.high >> (bits_in_word - 1); + result.s.low = input.s.high >> (b - bits_in_word); + } else /* 0 <= b < bits_in_word */ { + if (b == 0) + return a; + result.s.high = input.s.high >> b; + result.s.low = (input.s.high << (bits_in_word - b)) | (input.s.low >> b); + } + return result.all; +} + +#if defined(__ARM_EABI__) +COMPILER_RT_ALIAS(__ashrdi3, __aeabi_lasr) +#endif diff --git a/wasmrt/llvm-builtins/builtins/ashrti3.c b/wasmrt/llvm-builtins/builtins/ashrti3.c new file mode 100644 index 0000000..b306051 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/ashrti3.c @@ -0,0 +1,39 @@ +//===-- ashrti3.c - Implement __ashrti3 -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __ashrti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +// Returns: arithmetic a >> b + +// Precondition: 0 <= b < bits_in_tword + +COMPILER_RT_ABI ti_int __ashrti3(ti_int a, int b) { + const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); + twords input; + twords result; + input.all = a; + if (b & bits_in_dword) /* bits_in_dword <= b < bits_in_tword */ { + // result.s.high = input.s.high < 0 ? -1 : 0 + result.s.high = input.s.high >> (bits_in_dword - 1); + result.s.low = input.s.high >> (b - bits_in_dword); + } else /* 0 <= b < bits_in_dword */ { + if (b == 0) + return a; + result.s.high = input.s.high >> b; + result.s.low = (input.s.high << (bits_in_dword - b)) | (input.s.low >> b); + } + return result.all; +} + +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/assembly.h b/wasmrt/llvm-builtins/builtins/assembly.h new file mode 100644 index 0000000..169d496 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/assembly.h @@ -0,0 +1,292 @@ +//===-- assembly.h - compiler-rt assembler support macros -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines macros for use in compiler-rt assembler source. +// This file is not part of the interface of this library. +// +//===----------------------------------------------------------------------===// + +#ifndef COMPILERRT_ASSEMBLY_H +#define COMPILERRT_ASSEMBLY_H + +#if defined(__linux__) && defined(__CET__) +#if __has_include() +#include +#endif +#endif + +#if defined(__APPLE__) && defined(__aarch64__) +#define SEPARATOR %% +#else +#define SEPARATOR ; +#endif + +#if defined(__APPLE__) +#define HIDDEN(name) .private_extern name +#define LOCAL_LABEL(name) L_##name +// tell linker it can break up file at label boundaries +#define FILE_LEVEL_DIRECTIVE .subsections_via_symbols +#define SYMBOL_IS_FUNC(name) +#define CONST_SECTION .const + +#define NO_EXEC_STACK_DIRECTIVE + +#elif defined(__ELF__) + +#define HIDDEN(name) .hidden name +#define LOCAL_LABEL(name) .L_##name +#define FILE_LEVEL_DIRECTIVE +#if defined(__arm__) || defined(__aarch64__) +#define SYMBOL_IS_FUNC(name) .type name,%function +#else +#define SYMBOL_IS_FUNC(name) .type name,@function +#endif +#define CONST_SECTION .section .rodata + +#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \ + defined(__linux__) +#define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits +#else +#define NO_EXEC_STACK_DIRECTIVE +#endif + +#else // !__APPLE__ && !__ELF__ + +#define HIDDEN(name) +#define LOCAL_LABEL(name) .L ## name +#define FILE_LEVEL_DIRECTIVE +#define SYMBOL_IS_FUNC(name) \ + .def name SEPARATOR \ + .scl 2 SEPARATOR \ + .type 32 SEPARATOR \ + .endef +#define CONST_SECTION .section .rdata,"rd" + +#define NO_EXEC_STACK_DIRECTIVE + +#endif + +#if defined(__arm__) || defined(__aarch64__) +#define FUNC_ALIGN \ + .text SEPARATOR \ + .balign 16 SEPARATOR +#else +#define FUNC_ALIGN +#endif + +// BTI and PAC gnu property note +#define NT_GNU_PROPERTY_TYPE_0 5 +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1 +#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2 + +#if defined(__ARM_FEATURE_BTI_DEFAULT) +#define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI +#else +#define BTI_FLAG 0 +#endif + +#if __ARM_FEATURE_PAC_DEFAULT & 3 +#define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC +#else +#define PAC_FLAG 0 +#endif + +#define GNU_PROPERTY(type, value) \ + .pushsection .note.gnu.property, "a" SEPARATOR \ + .p2align 3 SEPARATOR \ + .word 4 SEPARATOR \ + .word 16 SEPARATOR \ + .word NT_GNU_PROPERTY_TYPE_0 SEPARATOR \ + .asciz "GNU" SEPARATOR \ + .word type SEPARATOR \ + .word 4 SEPARATOR \ + .word value SEPARATOR \ + .word 0 SEPARATOR \ + .popsection + +#if BTI_FLAG != 0 +#define BTI_C hint #34 +#define BTI_J hint #36 +#else +#define BTI_C +#define BTI_J +#endif + +#if (BTI_FLAG | PAC_FLAG) != 0 +#define GNU_PROPERTY_BTI_PAC \ + GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG) +#else +#define GNU_PROPERTY_BTI_PAC +#endif + +#if defined(__clang__) || defined(__GCC_HAVE_DWARF2_CFI_ASM) +#define CFI_START .cfi_startproc +#define CFI_END .cfi_endproc +#else +#define CFI_START +#define CFI_END +#endif + +#if defined(__arm__) + +// Determine actual [ARM][THUMB[1][2]] ISA using compiler predefined macros: +// - for '-mthumb -march=armv6' compiler defines '__thumb__' +// - for '-mthumb -march=armv7' compiler defines '__thumb__' and '__thumb2__' +#if defined(__thumb2__) || defined(__thumb__) +#define DEFINE_CODE_STATE .thumb SEPARATOR +#define DECLARE_FUNC_ENCODING .thumb_func SEPARATOR +#if defined(__thumb2__) +#define USE_THUMB_2 +#define IT(cond) it cond +#define ITT(cond) itt cond +#define ITE(cond) ite cond +#else +#define USE_THUMB_1 +#define IT(cond) +#define ITT(cond) +#define ITE(cond) +#endif // defined(__thumb__2) +#else // !defined(__thumb2__) && !defined(__thumb__) +#define DEFINE_CODE_STATE .arm SEPARATOR +#define DECLARE_FUNC_ENCODING +#define IT(cond) +#define ITT(cond) +#define ITE(cond) +#endif + +#if defined(USE_THUMB_1) && defined(USE_THUMB_2) +#error "USE_THUMB_1 and USE_THUMB_2 can't be defined together." +#endif + +#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5 +#define ARM_HAS_BX +#endif +#if !defined(__ARM_FEATURE_CLZ) && !defined(USE_THUMB_1) && \ + (__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__))) +#define __ARM_FEATURE_CLZ +#endif + +#ifdef ARM_HAS_BX +#define JMP(r) bx r +#define JMPc(r, c) bx##c r +#else +#define JMP(r) mov pc, r +#define JMPc(r, c) mov##c pc, r +#endif + +// pop {pc} can't switch Thumb mode on ARMv4T +#if __ARM_ARCH >= 5 +#define POP_PC() pop {pc} +#else +#define POP_PC() \ + pop {ip}; \ + JMP(ip) +#endif + +#if defined(USE_THUMB_2) +#define WIDE(op) op.w +#else +#define WIDE(op) op +#endif +#else // !defined(__arm) +#define DECLARE_FUNC_ENCODING +#define DEFINE_CODE_STATE +#endif + +#define GLUE2_(a, b) a##b +#define GLUE(a, b) GLUE2_(a, b) +#define GLUE2(a, b) GLUE2_(a, b) +#define GLUE3_(a, b, c) a##b##c +#define GLUE3(a, b, c) GLUE3_(a, b, c) +#define GLUE4_(a, b, c, d) a##b##c##d +#define GLUE4(a, b, c, d) GLUE4_(a, b, c, d) + +#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name) + +#ifdef VISIBILITY_HIDDEN +#define DECLARE_SYMBOL_VISIBILITY(name) \ + HIDDEN(SYMBOL_NAME(name)) SEPARATOR +#define DECLARE_SYMBOL_VISIBILITY_UNMANGLED(name) \ + HIDDEN(name) SEPARATOR +#else +#define DECLARE_SYMBOL_VISIBILITY(name) +#define DECLARE_SYMBOL_VISIBILITY_UNMANGLED(name) +#endif + +#define DEFINE_COMPILERRT_FUNCTION(name) \ + DEFINE_CODE_STATE \ + FILE_LEVEL_DIRECTIVE SEPARATOR \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + DECLARE_SYMBOL_VISIBILITY(name) \ + DECLARE_FUNC_ENCODING \ + SYMBOL_NAME(name): + +#define DEFINE_COMPILERRT_THUMB_FUNCTION(name) \ + DEFINE_CODE_STATE \ + FILE_LEVEL_DIRECTIVE SEPARATOR \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR \ + .thumb_func SEPARATOR \ + SYMBOL_NAME(name): + +#define DEFINE_COMPILERRT_PRIVATE_FUNCTION(name) \ + DEFINE_CODE_STATE \ + FILE_LEVEL_DIRECTIVE SEPARATOR \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + HIDDEN(SYMBOL_NAME(name)) SEPARATOR \ + DECLARE_FUNC_ENCODING \ + SYMBOL_NAME(name): + +#define DEFINE_COMPILERRT_PRIVATE_FUNCTION_UNMANGLED(name) \ + DEFINE_CODE_STATE \ + .globl name SEPARATOR \ + SYMBOL_IS_FUNC(name) SEPARATOR \ + HIDDEN(name) SEPARATOR \ + DECLARE_FUNC_ENCODING \ + name: + +#define DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(name) \ + DEFINE_CODE_STATE \ + FUNC_ALIGN \ + .globl name SEPARATOR \ + SYMBOL_IS_FUNC(name) SEPARATOR \ + DECLARE_SYMBOL_VISIBILITY_UNMANGLED(name) SEPARATOR \ + CFI_START SEPARATOR \ + DECLARE_FUNC_ENCODING \ + name: SEPARATOR BTI_C + +#define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target) \ + .globl SYMBOL_NAME(name) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR \ + .set SYMBOL_NAME(name), SYMBOL_NAME(target) SEPARATOR + +#if defined(__ARM_EABI__) +#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name) \ + DEFINE_COMPILERRT_FUNCTION_ALIAS(aeabi_name, name) +#else +#define DEFINE_AEABI_FUNCTION_ALIAS(aeabi_name, name) +#endif + +#ifdef __ELF__ +#define END_COMPILERRT_FUNCTION(name) \ + .size SYMBOL_NAME(name), . - SYMBOL_NAME(name) +#define END_COMPILERRT_OUTLINE_FUNCTION(name) \ + CFI_END SEPARATOR \ + .size SYMBOL_NAME(name), . - SYMBOL_NAME(name) +#else +#define END_COMPILERRT_FUNCTION(name) +#define END_COMPILERRT_OUTLINE_FUNCTION(name) \ + CFI_END +#endif + +#endif // COMPILERRT_ASSEMBLY_H diff --git a/wasmrt/llvm-builtins/builtins/atomic.c b/wasmrt/llvm-builtins/builtins/atomic.c new file mode 100644 index 0000000..852bb20 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/atomic.c @@ -0,0 +1,373 @@ +//===-- atomic.c - Implement support functions for atomic operations.------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// atomic.c defines a set of functions for performing atomic accesses on +// arbitrary-sized memory locations. This design uses locks that should +// be fast in the uncontended case, for two reasons: +// +// 1) This code must work with C programs that do not link to anything +// (including pthreads) and so it should not depend on any pthread +// functions. +// 2) Atomic operations, rather than explicit mutexes, are most commonly used +// on code where contended operations are rate. +// +// To avoid needing a per-object lock, this code allocates an array of +// locks and hashes the object pointers to find the one that it should use. +// For operations that must be atomic on two locations, the lower lock is +// always acquired first, to avoid deadlock. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +#include "assembly.h" + +// We use __builtin_mem* here to avoid dependencies on libc-provided headers. +#define memcpy __builtin_memcpy +#define memcmp __builtin_memcmp + +// Clang objects if you redefine a builtin. This little hack allows us to +// define a function with the same name as an intrinsic. +#pragma redefine_extname __atomic_load_c SYMBOL_NAME(__atomic_load) +#pragma redefine_extname __atomic_store_c SYMBOL_NAME(__atomic_store) +#pragma redefine_extname __atomic_exchange_c SYMBOL_NAME(__atomic_exchange) +#pragma redefine_extname __atomic_compare_exchange_c SYMBOL_NAME( \ + __atomic_compare_exchange) +#pragma redefine_extname __atomic_is_lock_free_c SYMBOL_NAME( \ + __atomic_is_lock_free) + +/// Number of locks. This allocates one page on 32-bit platforms, two on +/// 64-bit. This can be specified externally if a different trade between +/// memory usage and contention probability is required for a given platform. +#ifndef SPINLOCK_COUNT +#define SPINLOCK_COUNT (1 << 10) +#endif +static const long SPINLOCK_MASK = SPINLOCK_COUNT - 1; + +//////////////////////////////////////////////////////////////////////////////// +// Platform-specific lock implementation. Falls back to spinlocks if none is +// defined. Each platform should define the Lock type, and corresponding +// lock() and unlock() functions. +//////////////////////////////////////////////////////////////////////////////// +#if defined(__FreeBSD__) || defined(__DragonFly__) +#include +// clang-format off +#include +#include +#include +// clang-format on +typedef struct _usem Lock; +__inline static void unlock(Lock *l) { + __c11_atomic_store((_Atomic(uint32_t) *)&l->_count, 1, __ATOMIC_RELEASE); + __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); + if (l->_has_waiters) + _umtx_op(l, UMTX_OP_SEM_WAKE, 1, 0, 0); +} +__inline static void lock(Lock *l) { + uint32_t old = 1; + while (!__c11_atomic_compare_exchange_weak((_Atomic(uint32_t) *)&l->_count, + &old, 0, __ATOMIC_ACQUIRE, + __ATOMIC_RELAXED)) { + _umtx_op(l, UMTX_OP_SEM_WAIT, 0, 0, 0); + old = 1; + } +} +/// locks for atomic operations +static Lock locks[SPINLOCK_COUNT] = {[0 ... SPINLOCK_COUNT - 1] = {0, 1, 0}}; + +#elif defined(__APPLE__) +#include +typedef OSSpinLock Lock; +__inline static void unlock(Lock *l) { OSSpinLockUnlock(l); } +/// Locks a lock. In the current implementation, this is potentially +/// unbounded in the contended case. +__inline static void lock(Lock *l) { OSSpinLockLock(l); } +static Lock locks[SPINLOCK_COUNT]; // initialized to OS_SPINLOCK_INIT which is 0 + +#else +_Static_assert(__atomic_always_lock_free(sizeof(uintptr_t), 0), + "Implementation assumes lock-free pointer-size cmpxchg"); +typedef _Atomic(uintptr_t) Lock; +/// Unlock a lock. This is a release operation. +__inline static void unlock(Lock *l) { + __c11_atomic_store(l, 0, __ATOMIC_RELEASE); +} +/// Locks a lock. In the current implementation, this is potentially +/// unbounded in the contended case. +__inline static void lock(Lock *l) { + uintptr_t old = 0; + while (!__c11_atomic_compare_exchange_weak(l, &old, 1, __ATOMIC_ACQUIRE, + __ATOMIC_RELAXED)) + old = 0; +} +/// locks for atomic operations +static Lock locks[SPINLOCK_COUNT]; +#endif + +/// Returns a lock to use for a given pointer. +static __inline Lock *lock_for_pointer(void *ptr) { + intptr_t hash = (intptr_t)ptr; + // Disregard the lowest 4 bits. We want all values that may be part of the + // same memory operation to hash to the same value and therefore use the same + // lock. + hash >>= 4; + // Use the next bits as the basis for the hash + intptr_t low = hash & SPINLOCK_MASK; + // Now use the high(er) set of bits to perturb the hash, so that we don't + // get collisions from atomic fields in a single object + hash >>= 16; + hash ^= low; + // Return a pointer to the word to use + return locks + (hash & SPINLOCK_MASK); +} + +/// Macros for determining whether a size is lock free. +#define ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(size, p) \ + (__atomic_always_lock_free(size, p) || \ + (__atomic_always_lock_free(size, 0) && ((uintptr_t)p % size) == 0)) +#define IS_LOCK_FREE_1(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(1, p) +#define IS_LOCK_FREE_2(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(2, p) +#define IS_LOCK_FREE_4(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(4, p) +#define IS_LOCK_FREE_8(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(8, p) +#define IS_LOCK_FREE_16(p) ATOMIC_ALWAYS_LOCK_FREE_OR_ALIGNED_LOCK_FREE(16, p) + +/// Macro that calls the compiler-generated lock-free versions of functions +/// when they exist. +#define TRY_LOCK_FREE_CASE(n, type, ptr) \ + case n: \ + if (IS_LOCK_FREE_##n(ptr)) { \ + LOCK_FREE_ACTION(type); \ + } \ + break; +#ifdef __SIZEOF_INT128__ +#define TRY_LOCK_FREE_CASE_16(p) TRY_LOCK_FREE_CASE(16, __uint128_t, p) +#else +#define TRY_LOCK_FREE_CASE_16(p) /* __uint128_t not available */ +#endif + +#define LOCK_FREE_CASES(ptr) \ + do { \ + switch (size) { \ + TRY_LOCK_FREE_CASE(1, uint8_t, ptr) \ + TRY_LOCK_FREE_CASE(2, uint16_t, ptr) \ + TRY_LOCK_FREE_CASE(4, uint32_t, ptr) \ + TRY_LOCK_FREE_CASE(8, uint64_t, ptr) \ + TRY_LOCK_FREE_CASE_16(ptr) /* __uint128_t may not be supported */ \ + default: \ + break; \ + } \ + } while (0) + +/// Whether atomic operations for the given size (and alignment) are lock-free. +bool __atomic_is_lock_free_c(size_t size, void *ptr) { +#define LOCK_FREE_ACTION(type) return true; + LOCK_FREE_CASES(ptr); +#undef LOCK_FREE_ACTION + return false; +} + +/// An atomic load operation. This is atomic with respect to the source +/// pointer only. +void __atomic_load_c(int size, void *src, void *dest, int model) { +#define LOCK_FREE_ACTION(type) \ + *((type *)dest) = __c11_atomic_load((_Atomic(type) *)src, model); \ + return; + LOCK_FREE_CASES(src); +#undef LOCK_FREE_ACTION + Lock *l = lock_for_pointer(src); + lock(l); + memcpy(dest, src, size); + unlock(l); +} + +/// An atomic store operation. This is atomic with respect to the destination +/// pointer only. +void __atomic_store_c(int size, void *dest, void *src, int model) { +#define LOCK_FREE_ACTION(type) \ + __c11_atomic_store((_Atomic(type) *)dest, *(type *)src, model); \ + return; + LOCK_FREE_CASES(dest); +#undef LOCK_FREE_ACTION + Lock *l = lock_for_pointer(dest); + lock(l); + memcpy(dest, src, size); + unlock(l); +} + +/// Atomic compare and exchange operation. If the value at *ptr is identical +/// to the value at *expected, then this copies value at *desired to *ptr. If +/// they are not, then this stores the current value from *ptr in *expected. +/// +/// This function returns 1 if the exchange takes place or 0 if it fails. +int __atomic_compare_exchange_c(int size, void *ptr, void *expected, + void *desired, int success, int failure) { +#define LOCK_FREE_ACTION(type) \ + return __c11_atomic_compare_exchange_strong( \ + (_Atomic(type) *)ptr, (type *)expected, *(type *)desired, success, \ + failure) + LOCK_FREE_CASES(ptr); +#undef LOCK_FREE_ACTION + Lock *l = lock_for_pointer(ptr); + lock(l); + if (memcmp(ptr, expected, size) == 0) { + memcpy(ptr, desired, size); + unlock(l); + return 1; + } + memcpy(expected, ptr, size); + unlock(l); + return 0; +} + +/// Performs an atomic exchange operation between two pointers. This is atomic +/// with respect to the target address. +void __atomic_exchange_c(int size, void *ptr, void *val, void *old, int model) { +#define LOCK_FREE_ACTION(type) \ + *(type *)old = \ + __c11_atomic_exchange((_Atomic(type) *)ptr, *(type *)val, model); \ + return; + LOCK_FREE_CASES(ptr); +#undef LOCK_FREE_ACTION + Lock *l = lock_for_pointer(ptr); + lock(l); + memcpy(old, ptr, size); + memcpy(ptr, val, size); + unlock(l); +} + +//////////////////////////////////////////////////////////////////////////////// +// Where the size is known at compile time, the compiler may emit calls to +// specialised versions of the above functions. +//////////////////////////////////////////////////////////////////////////////// +#ifdef __SIZEOF_INT128__ +#define OPTIMISED_CASES \ + OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t) \ + OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t) \ + OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t) \ + OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t) \ + OPTIMISED_CASE(16, IS_LOCK_FREE_16, __uint128_t) +#else +#define OPTIMISED_CASES \ + OPTIMISED_CASE(1, IS_LOCK_FREE_1, uint8_t) \ + OPTIMISED_CASE(2, IS_LOCK_FREE_2, uint16_t) \ + OPTIMISED_CASE(4, IS_LOCK_FREE_4, uint32_t) \ + OPTIMISED_CASE(8, IS_LOCK_FREE_8, uint64_t) +#endif + +#define OPTIMISED_CASE(n, lockfree, type) \ + type __atomic_load_##n(type *src, int model) { \ + if (lockfree(src)) \ + return __c11_atomic_load((_Atomic(type) *)src, model); \ + Lock *l = lock_for_pointer(src); \ + lock(l); \ + type val = *src; \ + unlock(l); \ + return val; \ + } +OPTIMISED_CASES +#undef OPTIMISED_CASE + +#define OPTIMISED_CASE(n, lockfree, type) \ + void __atomic_store_##n(type *dest, type val, int model) { \ + if (lockfree(dest)) { \ + __c11_atomic_store((_Atomic(type) *)dest, val, model); \ + return; \ + } \ + Lock *l = lock_for_pointer(dest); \ + lock(l); \ + *dest = val; \ + unlock(l); \ + return; \ + } +OPTIMISED_CASES +#undef OPTIMISED_CASE + +#define OPTIMISED_CASE(n, lockfree, type) \ + type __atomic_exchange_##n(type *dest, type val, int model) { \ + if (lockfree(dest)) \ + return __c11_atomic_exchange((_Atomic(type) *)dest, val, model); \ + Lock *l = lock_for_pointer(dest); \ + lock(l); \ + type tmp = *dest; \ + *dest = val; \ + unlock(l); \ + return tmp; \ + } +OPTIMISED_CASES +#undef OPTIMISED_CASE + +#define OPTIMISED_CASE(n, lockfree, type) \ + bool __atomic_compare_exchange_##n(type *ptr, type *expected, type desired, \ + int success, int failure) { \ + if (lockfree(ptr)) \ + return __c11_atomic_compare_exchange_strong( \ + (_Atomic(type) *)ptr, expected, desired, success, failure); \ + Lock *l = lock_for_pointer(ptr); \ + lock(l); \ + if (*ptr == *expected) { \ + *ptr = desired; \ + unlock(l); \ + return true; \ + } \ + *expected = *ptr; \ + unlock(l); \ + return false; \ + } +OPTIMISED_CASES +#undef OPTIMISED_CASE + +//////////////////////////////////////////////////////////////////////////////// +// Atomic read-modify-write operations for integers of various sizes. +//////////////////////////////////////////////////////////////////////////////// +#define ATOMIC_RMW(n, lockfree, type, opname, op) \ + type __atomic_fetch_##opname##_##n(type *ptr, type val, int model) { \ + if (lockfree(ptr)) \ + return __c11_atomic_fetch_##opname((_Atomic(type) *)ptr, val, model); \ + Lock *l = lock_for_pointer(ptr); \ + lock(l); \ + type tmp = *ptr; \ + *ptr = tmp op val; \ + unlock(l); \ + return tmp; \ + } + +#define ATOMIC_RMW_NAND(n, lockfree, type) \ + type __atomic_fetch_nand_##n(type *ptr, type val, int model) { \ + if (lockfree(ptr)) \ + return __c11_atomic_fetch_nand((_Atomic(type) *)ptr, val, model); \ + Lock *l = lock_for_pointer(ptr); \ + lock(l); \ + type tmp = *ptr; \ + *ptr = ~(tmp & val); \ + unlock(l); \ + return tmp; \ + } + +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, add, +) +OPTIMISED_CASES +#undef OPTIMISED_CASE +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, sub, -) +OPTIMISED_CASES +#undef OPTIMISED_CASE +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, and, &) +OPTIMISED_CASES +#undef OPTIMISED_CASE +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, or, |) +OPTIMISED_CASES +#undef OPTIMISED_CASE +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW(n, lockfree, type, xor, ^) +OPTIMISED_CASES +#undef OPTIMISED_CASE +// Allow build with clang without __c11_atomic_fetch_nand builtin (pre-14) +#if __has_builtin(__c11_atomic_fetch_nand) +#define OPTIMISED_CASE(n, lockfree, type) ATOMIC_RMW_NAND(n, lockfree, type) +OPTIMISED_CASES +#undef OPTIMISED_CASE +#endif diff --git a/wasmrt/llvm-builtins/builtins/atomic_flag_clear.c b/wasmrt/llvm-builtins/builtins/atomic_flag_clear.c new file mode 100644 index 0000000..983e5d7 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/atomic_flag_clear.c @@ -0,0 +1,25 @@ +//===-- atomic_flag_clear.c -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements atomic_flag_clear from C11's stdatomic.h. +// +//===----------------------------------------------------------------------===// + +#ifndef __has_include +#define __has_include(inc) 0 +#endif + +#if __has_include() + +#include +#undef atomic_flag_clear +void atomic_flag_clear(volatile atomic_flag *object) { + __c11_atomic_store(&(object)->_Value, 0, __ATOMIC_SEQ_CST); +} + +#endif diff --git a/wasmrt/llvm-builtins/builtins/atomic_flag_clear_explicit.c b/wasmrt/llvm-builtins/builtins/atomic_flag_clear_explicit.c new file mode 100644 index 0000000..e61c064 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/atomic_flag_clear_explicit.c @@ -0,0 +1,26 @@ +//===-- atomic_flag_clear_explicit.c --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements atomic_flag_clear_explicit from C11's stdatomic.h. +// +//===----------------------------------------------------------------------===// + +#ifndef __has_include +#define __has_include(inc) 0 +#endif + +#if __has_include() + +#include +#undef atomic_flag_clear_explicit +void atomic_flag_clear_explicit(volatile atomic_flag *object, + memory_order order) { + __c11_atomic_store(&(object)->_Value, 0, order); +} + +#endif diff --git a/wasmrt/llvm-builtins/builtins/atomic_flag_test_and_set.c b/wasmrt/llvm-builtins/builtins/atomic_flag_test_and_set.c new file mode 100644 index 0000000..ee22b08 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/atomic_flag_test_and_set.c @@ -0,0 +1,25 @@ +//===-- atomic_flag_test_and_set.c ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements atomic_flag_test_and_set from C11's stdatomic.h. +// +//===----------------------------------------------------------------------===// + +#ifndef __has_include +#define __has_include(inc) 0 +#endif + +#if __has_include() + +#include +#undef atomic_flag_test_and_set +_Bool atomic_flag_test_and_set(volatile atomic_flag *object) { + return __c11_atomic_exchange(&(object)->_Value, 1, __ATOMIC_SEQ_CST); +} + +#endif diff --git a/wasmrt/llvm-builtins/builtins/atomic_flag_test_and_set_explicit.c b/wasmrt/llvm-builtins/builtins/atomic_flag_test_and_set_explicit.c new file mode 100644 index 0000000..8c9d039 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/atomic_flag_test_and_set_explicit.c @@ -0,0 +1,26 @@ +//===-- atomic_flag_test_and_set_explicit.c -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements atomic_flag_test_and_set_explicit from C11's stdatomic.h +// +//===----------------------------------------------------------------------===// + +#ifndef __has_include +#define __has_include(inc) 0 +#endif + +#if __has_include() + +#include +#undef atomic_flag_test_and_set_explicit +_Bool atomic_flag_test_and_set_explicit(volatile atomic_flag *object, + memory_order order) { + return __c11_atomic_exchange(&(object)->_Value, 1, order); +} + +#endif diff --git a/wasmrt/llvm-builtins/builtins/atomic_signal_fence.c b/wasmrt/llvm-builtins/builtins/atomic_signal_fence.c new file mode 100644 index 0000000..f4f5169 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/atomic_signal_fence.c @@ -0,0 +1,25 @@ +//===-- atomic_signal_fence.c ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements atomic_signal_fence from C11's stdatomic.h. +// +//===----------------------------------------------------------------------===// + +#ifndef __has_include +#define __has_include(inc) 0 +#endif + +#if __has_include() + +#include +#undef atomic_signal_fence +void atomic_signal_fence(memory_order order) { + __c11_atomic_signal_fence(order); +} + +#endif diff --git a/wasmrt/llvm-builtins/builtins/atomic_thread_fence.c b/wasmrt/llvm-builtins/builtins/atomic_thread_fence.c new file mode 100644 index 0000000..5659ecb --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/atomic_thread_fence.c @@ -0,0 +1,25 @@ +//===-- atomic_thread_fence.c ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements atomic_thread_fence from C11's stdatomic.h. +// +//===----------------------------------------------------------------------===// + +#ifndef __has_include +#define __has_include(inc) 0 +#endif + +#if __has_include() + +#include +#undef atomic_thread_fence +void atomic_thread_fence(memory_order order) { + __c11_atomic_thread_fence(order); +} + +#endif diff --git a/wasmrt/llvm-builtins/builtins/avr/divmodhi4.S b/wasmrt/llvm-builtins/builtins/avr/divmodhi4.S new file mode 100644 index 0000000..3717133 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/avr/divmodhi4.S @@ -0,0 +1,57 @@ +//===------------- divmodhi4.S - sint16 div & mod -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// As described at +// https://gcc.gnu.org/wiki/avr-gcc#Exceptions_to_the_Calling_Convention, the +// prototype is `struct {sint16, sint16} __divmodhi4(sint16, sint16)`. +// The sint16 quotient is returned via R23:R22, and the sint16 remainder is +// returned via R25:R24, while registers R21/R26/27/Rtmp and bit T in SREG +// are clobbered. +// +//===----------------------------------------------------------------------===// + + .text + .align 2 + +#ifdef __AVR_TINY__ + .set __tmp_reg__, 16 +#else + .set __tmp_reg__, 0 +#endif + + .globl __divmodhi4 + .type __divmodhi4, @function + +__divmodhi4: + bst r25, 7 + mov __tmp_reg__, r23 + brtc __divmodhi4_a + com __tmp_reg__ + rcall __divmodhi4_b + +__divmodhi4_a: + sbrc r23, 7 + rcall __divmodhi4_c + rcall __udivmodhi4 ; Call __udivmodhi4 to do real calculation. + sbrc __tmp_reg__, 7 + rcall __divmodhi4_c + brtc __divmodhi4_exit + +__divmodhi4_b: + com r25 + neg r24 + sbci r25, 255 + ret ; Return quotient via R23:R22 and remainder via R25:R24. + +__divmodhi4_c: + com r23 + neg r22 + sbci r23, 255 + +__divmodhi4_exit: + ret ; Return quotient via R23:R22 and remainder via R25:r24. diff --git a/wasmrt/llvm-builtins/builtins/avr/divmodqi4.S b/wasmrt/llvm-builtins/builtins/avr/divmodqi4.S new file mode 100644 index 0000000..66cfc0c --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/avr/divmodqi4.S @@ -0,0 +1,44 @@ +//===------------- divmodqi4.S - sint8 div & mod --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// As described at +// https://gcc.gnu.org/wiki/avr-gcc#Exceptions_to_the_Calling_Convention, the +// prototype is `struct {sint8, sint8} __divmodqi4(sint8, sint8)`. +// The sint8 quotient is returned via R24, and the sint8 remainder is returned +// via R25, while registers R23/Rtmp and bit T in SREG are clobbered. +// +//===----------------------------------------------------------------------===// + + .text + .align 2 + +#ifdef __AVR_TINY__ + .set __tmp_reg__, 16 +#else + .set __tmp_reg__, 0 +#endif + + .globl __divmodqi4 + .type __divmodqi4, @function + +__divmodqi4: + bst r24, 7 + mov __tmp_reg__, r24 + eor __tmp_reg__, r22 + sbrc r24, 7 + neg r24 + sbrc r22, 7 + neg r22 + rcall __udivmodqi4 ; Call __udivmodqi4 to do real calculation. + brtc __divmodqi4_1 + neg r25 + +__divmodqi4_1: + sbrc __tmp_reg__, 7 + neg r24 + ret ; Return quotient via R24 and remainder via R25. diff --git a/wasmrt/llvm-builtins/builtins/avr/exit.S b/wasmrt/llvm-builtins/builtins/avr/exit.S new file mode 100644 index 0000000..3cd9c5d --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/avr/exit.S @@ -0,0 +1,18 @@ +//===------------ exit.S - global terminator for AVR ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + + .text + .align 2 + + .globl _exit + .type _exit, @function + +_exit: + cli ; Disable all interrupts. +__stop_program: + rjmp __stop_program ; Fall into an infinite loop. diff --git a/wasmrt/llvm-builtins/builtins/avr/mulhi3.S b/wasmrt/llvm-builtins/builtins/avr/mulhi3.S new file mode 100644 index 0000000..d65f52f --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/avr/mulhi3.S @@ -0,0 +1,71 @@ +//===------------ mulhi3.S - int16 multiplication -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The corresponding C code is something like: +// +// int __mulhi3(int A, int B) { +// int S = 0; +// while (A != 0) { +// if (A & 1) +// S += B; +// A = ((unsigned int) A) >> 1; +// B <<= 1; +// } +// return S; +// } +// +// __mulhi3 has special ABI, as the implementation of libgcc, R25:R24 is used +// to return result, while Rtmp/R21/R22/R23 are clobbered. +// +//===----------------------------------------------------------------------===// + + .text + .align 2 + +#ifdef __AVR_TINY__ + .set __tmp_reg__, 16 + .set __zero_reg__, 17 +#else + .set __tmp_reg__, 0 + .set __zero_reg__, 1 +#endif + + .globl __mulhi3 + .type __mulhi3, @function + +__mulhi3: + ; Use Rzero:Rtmp to store the result. + clr __tmp_reg__ + clr __zero_reg__ ; S = 0; + +__mulhi3_loop: + clr r21 + cp r24, r21 + cpc r25, r21 + breq __mulhi3_end ; while (A != 0) { + + mov r21, r24 + andi r21, 1 + breq __mulhi3_loop_a ; if (A & 1) + add __tmp_reg__, r22 + adc __zero_reg__, r23 ; S += B; + +__mulhi3_loop_a: + lsr r25 + ror r24 ; A = ((unsigned int) A) >> 1; + lsl r22 + rol r23 ; B <<= 1; + rjmp __mulhi3_loop ; } + +__mulhi3_end: + ; Return the result via R25:R24. + mov r24, __tmp_reg__ + mov r25, __zero_reg__ + ; Restore __zero_reg__ to 0. + clr __zero_reg__ + ret ; return S; diff --git a/wasmrt/llvm-builtins/builtins/avr/mulqi3.S b/wasmrt/llvm-builtins/builtins/avr/mulqi3.S new file mode 100644 index 0000000..914735c --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/avr/mulqi3.S @@ -0,0 +1,53 @@ +//===------------ mulhi3.S - int8 multiplication --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The corresponding C code is something like: +// +// char __mulqi3(char A, char B) { +// int S = 0; +// while (A != 0) { +// if (A & 1) +// S += B; +// B <<= 1; +// A = ((unsigned char) A) >> 1; +// } +// return S; +// } +// +// __mulqi3 has special ABI, as the implementation of libgcc, the result is +// returned via R24, while Rtmp and R22 are clobbered. +// +//===----------------------------------------------------------------------===// + + .text + .align 2 + +#ifdef __AVR_TINY__ + .set __tmp_reg__, 16 +#else + .set __tmp_reg__, 0 +#endif + + .globl __mulqi3 + .type __mulqi3, @function + +__mulqi3: + clr __tmp_reg__ ; S = 0; + +__mulqi3_loop: + cpi r24, 0 + breq __mulqi3_end ; while (A != 0) { + sbrc r24, 0 ; if (A & 1) + add __tmp_reg__, r22 ; S += B; + add r22, r22 ; B <<= 1; + lsr r24 ; A = ((unsigned char) A) >> 1; + rjmp __mulqi3_loop ; } + +__mulqi3_end: + mov r24, __tmp_reg__ + ret ; return S; diff --git a/wasmrt/llvm-builtins/builtins/avr/udivmodhi4.S b/wasmrt/llvm-builtins/builtins/avr/udivmodhi4.S new file mode 100644 index 0000000..0e52b86 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/avr/udivmodhi4.S @@ -0,0 +1,49 @@ +//===------------ udivmodhi4.S - uint16 div & mod -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// As described at +// https://gcc.gnu.org/wiki/avr-gcc#Exceptions_to_the_Calling_Convention, the +// prototype is `struct {uint16, uint16} __udivmodhi4(uint16, uint16)`. +// The uint16 quotient is returned via R23:R22, and the uint16 remainder is +// returned via R25:R24, while R21/R26/R27 are clobbered. +// +//===----------------------------------------------------------------------===// + + .text + .align 2 + + .globl __udivmodhi4 + .type __udivmodhi4, @function + +__udivmodhi4: + sub r26, r26 + sub r27, r27 ; Initialize the remainder to zero. + ldi r21, 17 ; Only loop 16 rounds for uint16. + +__udivmodhi4_loop: + adc r24, r24 + adc r25, r25 + dec r21 + breq __udivmodhi4_end + adc r26, r26 + adc r27, r27 + cp r26, r22 + cpc r27, r23 ; Compare with the divisor. + brcs __udivmodhi4_loop + sub r26, r22 + sbc r27, r23 ; Subtract the divisor. + rjmp __udivmodhi4_loop + +__udivmodhi4_end: + com r24 + com r25 + mov r22, r24 + mov r23, r25 ; The quotient is returned in R23:R22. + mov r24, r26 + mov r25, r27 ; The remainder is returned in in R25:R24. + ret diff --git a/wasmrt/llvm-builtins/builtins/avr/udivmodqi4.S b/wasmrt/llvm-builtins/builtins/avr/udivmodqi4.S new file mode 100644 index 0000000..99aec34 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/avr/udivmodqi4.S @@ -0,0 +1,39 @@ +//===------------ udivmodqi4.S - uint8 div & mod --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// As described at +// https://gcc.gnu.org/wiki/avr-gcc#Exceptions_to_the_Calling_Convention, the +// prototype is `struct {uint8, uint8} __udivmodqi4(uint8, uint8)`. +// The uint8 quotient is returned via R24, and the uint8 remainder is returned +// via R25, while R23 is clobbered. +// +//===----------------------------------------------------------------------===// + + .text + .align 2 + + .globl __udivmodqi4 + .type __udivmodqi4, @function + +__udivmodqi4: + sub r25, r25 ; Initialize the remainder to zero. + ldi r23, 9 ; Only loop 8 rounds for uint8. + +__udivmodqi4_loop: + adc r24, r24 + dec r23 + breq __udivmodqi4_end + adc r25, r25 + cp r25, r22 ; Compare with the divisor. + brcs __udivmodqi4_loop + sub r25, r22 ; Subtract the divisor. + rjmp __udivmodqi4_loop + +__udivmodqi4_end: + com r24 ; The uint8 quotient is returned via R24. + ret ; The uint8 remainder is returned via R25. diff --git a/wasmrt/llvm-builtins/builtins/bswapdi2.c b/wasmrt/llvm-builtins/builtins/bswapdi2.c new file mode 100644 index 0000000..cd049f5 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/bswapdi2.c @@ -0,0 +1,25 @@ +//===-- bswapdi2.c - Implement __bswapdi2 ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __bswapdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +COMPILER_RT_ABI uint64_t __bswapdi2(uint64_t u) { + return ( + (((u)&0xff00000000000000ULL) >> 56) | + (((u)&0x00ff000000000000ULL) >> 40) | + (((u)&0x0000ff0000000000ULL) >> 24) | + (((u)&0x000000ff00000000ULL) >> 8) | + (((u)&0x00000000ff000000ULL) << 8) | + (((u)&0x0000000000ff0000ULL) << 24) | + (((u)&0x000000000000ff00ULL) << 40) | + (((u)&0x00000000000000ffULL) << 56)); +} diff --git a/wasmrt/llvm-builtins/builtins/bswapsi2.c b/wasmrt/llvm-builtins/builtins/bswapsi2.c new file mode 100644 index 0000000..ec566d6 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/bswapsi2.c @@ -0,0 +1,20 @@ +//===-- bswapsi2.c - Implement __bswapsi2 ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __bswapsi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +COMPILER_RT_ABI uint32_t __bswapsi2(uint32_t u) { + return ((((u)&0xff000000) >> 24) | + (((u)&0x00ff0000) >> 8) | + (((u)&0x0000ff00) << 8) | + (((u)&0x000000ff) << 24)); +} diff --git a/wasmrt/llvm-builtins/builtins/clear_cache.c b/wasmrt/llvm-builtins/builtins/clear_cache.c new file mode 100644 index 0000000..2ac99b2 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/clear_cache.c @@ -0,0 +1,212 @@ +//===-- clear_cache.c - Implement __clear_cache ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#if defined(__linux__) +#include +#endif +#include + +#if __APPLE__ +#include +#endif + +#if defined(_WIN32) +// Forward declare Win32 APIs since the GCC mode driver does not handle the +// newer SDKs as well as needed. +uint32_t FlushInstructionCache(uintptr_t hProcess, void *lpBaseAddress, + uintptr_t dwSize); +uintptr_t GetCurrentProcess(void); +#endif + +#if defined(__FreeBSD__) && defined(__arm__) +// clang-format off +#include +#include +// clang-format on +#endif + +#if defined(__NetBSD__) && defined(__arm__) +#include +#endif + +#if defined(__OpenBSD__) && (defined(__arm__) || defined(__mips__) || defined(__riscv)) +// clang-format off +#include +#include +// clang-format on +#endif + +#if defined(__linux__) && defined(__mips__) +#include +#include +#include +#endif + +#if defined(__linux__) && defined(__riscv) +// to get platform-specific syscall definitions +#include +#endif + +// The compiler generates calls to __clear_cache() when creating +// trampoline functions on the stack for use with nested functions. +// It is expected to invalidate the instruction cache for the +// specified range. + +void __clear_cache(void *start, void *end) { +#if __i386__ || __x86_64__ || defined(_M_IX86) || defined(_M_X64) +// Intel processors have a unified instruction and data cache +// so there is nothing to do +#elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__)) + FlushInstructionCache(GetCurrentProcess(), start, end - start); +#elif defined(__arm__) && !defined(__APPLE__) +#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) + struct arm_sync_icache_args arg; + + arg.addr = (uintptr_t)start; + arg.len = (uintptr_t)end - (uintptr_t)start; + + sysarch(ARM_SYNC_ICACHE, &arg); +#elif defined(__linux__) +// We used to include asm/unistd.h for the __ARM_NR_cacheflush define, but +// it also brought many other unused defines, as well as a dependency on +// kernel headers to be installed. +// +// This value is stable at least since Linux 3.13 and should remain so for +// compatibility reasons, warranting it's re-definition here. +#define __ARM_NR_cacheflush 0x0f0002 + register int start_reg __asm("r0") = (int)(intptr_t)start; + const register int end_reg __asm("r1") = (int)(intptr_t)end; + const register int flags __asm("r2") = 0; + const register int syscall_nr __asm("r7") = __ARM_NR_cacheflush; + __asm __volatile("svc 0x0" + : "=r"(start_reg) + : "r"(syscall_nr), "r"(start_reg), "r"(end_reg), "r"(flags)); + assert(start_reg == 0 && "Cache flush syscall failed."); +#else + compilerrt_abort(); +#endif +#elif defined(__linux__) && defined(__loongarch__) + __asm__ volatile("ibar 0"); +#elif defined(__mips__) + const uintptr_t start_int = (uintptr_t)start; + const uintptr_t end_int = (uintptr_t)end; + uintptr_t synci_step; + __asm__ volatile("rdhwr %0, $1" : "=r"(synci_step)); + if (synci_step != 0) { +#if __mips_isa_rev >= 6 + for (uintptr_t p = start_int; p < end_int; p += synci_step) + __asm__ volatile("synci 0(%0)" : : "r"(p)); + + // The last "move $at, $0" is the target of jr.hb instead of delay slot. + __asm__ volatile(".set noat\n" + "sync\n" + "addiupc $at, 12\n" + "jr.hb $at\n" + "move $at, $0\n" + ".set at"); +#elif defined(__linux__) || defined(__OpenBSD__) + // Pre-R6 may not be globalized. And some implementations may give strange + // synci_step. So, let's use libc call for it. + _flush_cache(start, end_int - start_int, BCACHE); +#else + (void)start_int; + (void)end_int; + compilerrt_abort(); +#endif + } +#elif defined(__aarch64__) && !defined(__APPLE__) + uint64_t xstart = (uint64_t)(uintptr_t)start; + uint64_t xend = (uint64_t)(uintptr_t)end; + + // Get Cache Type Info. + static uint64_t ctr_el0 = 0; + if (ctr_el0 == 0) + __asm __volatile("mrs %0, ctr_el0" : "=r"(ctr_el0)); + + // The DC and IC instructions must use 64-bit registers so we don't use + // uintptr_t in case this runs in an IPL32 environment. + uint64_t addr; + + // If CTR_EL0.IDC is set, data cache cleaning to the point of unification + // is not required for instruction to data coherence. + if (((ctr_el0 >> 28) & 0x1) == 0x0) { + const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15); + for (addr = xstart & ~(dcache_line_size - 1); addr < xend; + addr += dcache_line_size) + __asm __volatile("dc cvau, %0" ::"r"(addr)); + } + __asm __volatile("dsb ish"); + + // If CTR_EL0.DIC is set, instruction cache invalidation to the point of + // unification is not required for instruction to data coherence. + if (((ctr_el0 >> 29) & 0x1) == 0x0) { + const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15); + for (addr = xstart & ~(icache_line_size - 1); addr < xend; + addr += icache_line_size) + __asm __volatile("ic ivau, %0" ::"r"(addr)); + __asm __volatile("dsb ish"); + } + __asm __volatile("isb sy"); +#elif defined(__powerpc__) + // Newer CPUs have a bigger line size made of multiple blocks, so the + // following value is a minimal common denominator for what used to be + // a single block cache line and is therefore inneficient. + const size_t line_size = 32; + const size_t len = (uintptr_t)end - (uintptr_t)start; + + const uintptr_t mask = ~(line_size - 1); + const uintptr_t start_line = ((uintptr_t)start) & mask; + const uintptr_t end_line = ((uintptr_t)start + len + line_size - 1) & mask; + + for (uintptr_t line = start_line; line < end_line; line += line_size) + __asm__ volatile("dcbf 0, %0" : : "r"(line)); + __asm__ volatile("sync"); + + for (uintptr_t line = start_line; line < end_line; line += line_size) + __asm__ volatile("icbi 0, %0" : : "r"(line)); + __asm__ volatile("isync"); +#elif defined(__sparc__) + const size_t dword_size = 8; + const size_t len = (uintptr_t)end - (uintptr_t)start; + + const uintptr_t mask = ~(dword_size - 1); + const uintptr_t start_dword = ((uintptr_t)start) & mask; + const uintptr_t end_dword = ((uintptr_t)start + len + dword_size - 1) & mask; + + for (uintptr_t dword = start_dword; dword < end_dword; dword += dword_size) + __asm__ volatile("flush %0" : : "r"(dword)); +#elif defined(__riscv) && defined(__linux__) + // See: arch/riscv/include/asm/cacheflush.h, arch/riscv/kernel/sys_riscv.c + register void *start_reg __asm("a0") = start; + const register void *end_reg __asm("a1") = end; + // "0" means that we clear cache for all threads (SYS_RISCV_FLUSH_ICACHE_ALL) + const register long flags __asm("a2") = 0; + const register long syscall_nr __asm("a7") = __NR_riscv_flush_icache; + __asm __volatile("ecall" + : "=r"(start_reg) + : "r"(start_reg), "r"(end_reg), "r"(flags), "r"(syscall_nr)); + assert(start_reg == 0 && "Cache flush syscall failed."); +#elif defined(__riscv) && defined(__OpenBSD__) + struct riscv_sync_icache_args arg; + + arg.addr = (uintptr_t)start; + arg.len = (uintptr_t)end - (uintptr_t)start; + + sysarch(RISCV_SYNC_ICACHE, &arg); +#elif defined(__ve__) + __asm__ volatile("fencec 2"); +#else +#if __APPLE__ + // On Darwin, sys_icache_invalidate() provides this functionality + sys_icache_invalidate(start, end - start); +#else + compilerrt_abort(); +#endif +#endif +} diff --git a/wasmrt/llvm-builtins/builtins/clzdi2.c b/wasmrt/llvm-builtins/builtins/clzdi2.c new file mode 100644 index 0000000..12c1798 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/clzdi2.c @@ -0,0 +1,35 @@ +//===-- clzdi2.c - Implement __clzdi2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __clzdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: the number of leading 0-bits + +#if !defined(__clang__) && \ + ((defined(__sparc__) && defined(__arch64__)) || defined(__mips64) || \ + (defined(__riscv) && __SIZEOF_POINTER__ >= 8)) +// On 64-bit architectures with neither a native clz instruction nor a native +// ctz instruction, gcc resolves __builtin_clz to __clzdi2 rather than +// __clzsi2, leading to infinite recursion. +#define __builtin_clz(a) __clzsi2(a) +extern int __clzsi2(si_int); +#endif + +// Precondition: a != 0 + +COMPILER_RT_ABI int __clzdi2(di_int a) { + dwords x; + x.all = a; + const si_int f = -(x.s.high == 0); + return clzsi((x.s.high & ~f) | (x.s.low & f)) + + (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); +} diff --git a/wasmrt/llvm-builtins/builtins/clzsi2.c b/wasmrt/llvm-builtins/builtins/clzsi2.c new file mode 100644 index 0000000..d75f56d --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/clzsi2.c @@ -0,0 +1,48 @@ +//===-- clzsi2.c - Implement __clzsi2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __clzsi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: the number of leading 0-bits + +// Precondition: a != 0 + +COMPILER_RT_ABI int __clzsi2(si_int a) { + su_int x = (su_int)a; + si_int t = ((x & 0xFFFF0000) == 0) << 4; // if (x is small) t = 16 else 0 + x >>= 16 - t; // x = [0 - 0xFFFF] + su_int r = t; // r = [0, 16] + // return r + clz(x) + t = ((x & 0xFF00) == 0) << 3; + x >>= 8 - t; // x = [0 - 0xFF] + r += t; // r = [0, 8, 16, 24] + // return r + clz(x) + t = ((x & 0xF0) == 0) << 2; + x >>= 4 - t; // x = [0 - 0xF] + r += t; // r = [0, 4, 8, 12, 16, 20, 24, 28] + // return r + clz(x) + t = ((x & 0xC) == 0) << 1; + x >>= 2 - t; // x = [0 - 3] + r += t; // r = [0 - 30] and is even + // return r + clz(x) + // switch (x) + // { + // case 0: + // return r + 2; + // case 1: + // return r + 1; + // case 2: + // case 3: + // return r; + // } + return r + ((2 - x) & -((x & 2) == 0)); +} diff --git a/wasmrt/llvm-builtins/builtins/clzti2.c b/wasmrt/llvm-builtins/builtins/clzti2.c new file mode 100644 index 0000000..25d3011 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/clzti2.c @@ -0,0 +1,29 @@ +//===-- clzti2.c - Implement __clzti2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __clzti2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +// Returns: the number of leading 0-bits + +// Precondition: a != 0 + +COMPILER_RT_ABI int __clzti2(ti_int a) { + twords x; + x.all = a; + const di_int f = -(x.s.high == 0); + return __builtin_clzll((x.s.high & ~f) | (x.s.low & f)) + + ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); +} + +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/cmpdi2.c b/wasmrt/llvm-builtins/builtins/cmpdi2.c new file mode 100644 index 0000000..951db85 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/cmpdi2.c @@ -0,0 +1,42 @@ +//===-- cmpdi2.c - Implement __cmpdi2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __cmpdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: if (a < b) returns 0 +// if (a == b) returns 1 +// if (a > b) returns 2 + +COMPILER_RT_ABI si_int __cmpdi2(di_int a, di_int b) { + dwords x; + x.all = a; + dwords y; + y.all = b; + if (x.s.high < y.s.high) + return 0; + if (x.s.high > y.s.high) + return 2; + if (x.s.low < y.s.low) + return 0; + if (x.s.low > y.s.low) + return 2; + return 1; +} + +#ifdef __ARM_EABI__ +// Returns: if (a < b) returns -1 +// if (a == b) returns 0 +// if (a > b) returns 1 +COMPILER_RT_ABI si_int __aeabi_lcmp(di_int a, di_int b) { + return __cmpdi2(a, b) - 1; +} +#endif diff --git a/wasmrt/llvm-builtins/builtins/cmpti2.c b/wasmrt/llvm-builtins/builtins/cmpti2.c new file mode 100644 index 0000000..7f0ee1b --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/cmpti2.c @@ -0,0 +1,37 @@ +//===-- cmpti2.c - Implement __cmpti2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __cmpti2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +// Returns: if (a < b) returns 0 +// if (a == b) returns 1 +// if (a > b) returns 2 + +COMPILER_RT_ABI si_int __cmpti2(ti_int a, ti_int b) { + twords x; + x.all = a; + twords y; + y.all = b; + if (x.s.high < y.s.high) + return 0; + if (x.s.high > y.s.high) + return 2; + if (x.s.low < y.s.low) + return 0; + if (x.s.low > y.s.low) + return 2; + return 1; +} + +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/comparedf2.c b/wasmrt/llvm-builtins/builtins/comparedf2.c new file mode 100644 index 0000000..e1fc12c --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/comparedf2.c @@ -0,0 +1,77 @@ +//===-- lib/comparedf2.c - Double-precision comparisons -----------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// // This file implements the following soft-float comparison routines: +// +// __eqdf2 __gedf2 __unorddf2 +// __ledf2 __gtdf2 +// __ltdf2 +// __nedf2 +// +// The semantics of the routines grouped in each column are identical, so there +// is a single implementation for each, and wrappers to provide the other names. +// +// The main routines behave as follows: +// +// __ledf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// 1 if either a or b is NaN +// +// __gedf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// -1 if either a or b is NaN +// +// __unorddf2(a,b) returns 0 if both a and b are numbers +// 1 if either a or b is NaN +// +// Note that __ledf2( ) and __gedf2( ) are identical except in their handling of +// NaN values. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +#include "fp_compare_impl.inc" + +COMPILER_RT_ABI CMP_RESULT __ledf2(fp_t a, fp_t b) { return __leXf2__(a, b); } + +#if defined(__ELF__) +// Alias for libgcc compatibility +COMPILER_RT_ALIAS(__ledf2, __cmpdf2) +#endif +COMPILER_RT_ALIAS(__ledf2, __eqdf2) +COMPILER_RT_ALIAS(__ledf2, __ltdf2) +COMPILER_RT_ALIAS(__ledf2, __nedf2) + +COMPILER_RT_ABI CMP_RESULT __gedf2(fp_t a, fp_t b) { return __geXf2__(a, b); } + +COMPILER_RT_ALIAS(__gedf2, __gtdf2) + +COMPILER_RT_ABI CMP_RESULT __unorddf2(fp_t a, fp_t b) { + return __unordXf2__(a, b); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI int __aeabi_dcmpun(fp_t a, fp_t b) { return __unorddf2(a, b); } +#else +COMPILER_RT_ALIAS(__unorddf2, __aeabi_dcmpun) +#endif +#endif + +#if defined(_WIN32) && !defined(__MINGW32__) +// The alias mechanism doesn't work on Windows except for MinGW, so emit +// wrapper functions. +int __eqdf2(fp_t a, fp_t b) { return __ledf2(a, b); } +int __ltdf2(fp_t a, fp_t b) { return __ledf2(a, b); } +int __nedf2(fp_t a, fp_t b) { return __ledf2(a, b); } +int __gtdf2(fp_t a, fp_t b) { return __gedf2(a, b); } +#endif diff --git a/wasmrt/llvm-builtins/builtins/comparesf2.c b/wasmrt/llvm-builtins/builtins/comparesf2.c new file mode 100644 index 0000000..b8a9554 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/comparesf2.c @@ -0,0 +1,77 @@ +//===-- lib/comparesf2.c - Single-precision comparisons -----------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the following soft-fp_t comparison routines: +// +// __eqsf2 __gesf2 __unordsf2 +// __lesf2 __gtsf2 +// __ltsf2 +// __nesf2 +// +// The semantics of the routines grouped in each column are identical, so there +// is a single implementation for each, and wrappers to provide the other names. +// +// The main routines behave as follows: +// +// __lesf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// 1 if either a or b is NaN +// +// __gesf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// -1 if either a or b is NaN +// +// __unordsf2(a,b) returns 0 if both a and b are numbers +// 1 if either a or b is NaN +// +// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of +// NaN values. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +#include "fp_compare_impl.inc" + +COMPILER_RT_ABI CMP_RESULT __lesf2(fp_t a, fp_t b) { return __leXf2__(a, b); } + +#if defined(__ELF__) +// Alias for libgcc compatibility +COMPILER_RT_ALIAS(__lesf2, __cmpsf2) +#endif +COMPILER_RT_ALIAS(__lesf2, __eqsf2) +COMPILER_RT_ALIAS(__lesf2, __ltsf2) +COMPILER_RT_ALIAS(__lesf2, __nesf2) + +COMPILER_RT_ABI CMP_RESULT __gesf2(fp_t a, fp_t b) { return __geXf2__(a, b); } + +COMPILER_RT_ALIAS(__gesf2, __gtsf2) + +COMPILER_RT_ABI CMP_RESULT __unordsf2(fp_t a, fp_t b) { + return __unordXf2__(a, b); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI int __aeabi_fcmpun(fp_t a, fp_t b) { return __unordsf2(a, b); } +#else +COMPILER_RT_ALIAS(__unordsf2, __aeabi_fcmpun) +#endif +#endif + +#if defined(_WIN32) && !defined(__MINGW32__) +// The alias mechanism doesn't work on Windows except for MinGW, so emit +// wrapper functions. +int __eqsf2(fp_t a, fp_t b) { return __lesf2(a, b); } +int __ltsf2(fp_t a, fp_t b) { return __lesf2(a, b); } +int __nesf2(fp_t a, fp_t b) { return __lesf2(a, b); } +int __gtsf2(fp_t a, fp_t b) { return __gesf2(a, b); } +#endif diff --git a/wasmrt/llvm-builtins/builtins/comparetf2.c b/wasmrt/llvm-builtins/builtins/comparetf2.c new file mode 100644 index 0000000..be5e9e5 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/comparetf2.c @@ -0,0 +1,63 @@ +//===-- lib/comparetf2.c - Quad-precision comparisons -------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// // This file implements the following soft-float comparison routines: +// +// __eqtf2 __getf2 __unordtf2 +// __letf2 __gttf2 +// __lttf2 +// __netf2 +// +// The semantics of the routines grouped in each column are identical, so there +// is a single implementation for each, and wrappers to provide the other names. +// +// The main routines behave as follows: +// +// __letf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// 1 if either a or b is NaN +// +// __getf2(a,b) returns -1 if a < b +// 0 if a == b +// 1 if a > b +// -1 if either a or b is NaN +// +// __unordtf2(a,b) returns 0 if both a and b are numbers +// 1 if either a or b is NaN +// +// Note that __letf2( ) and __getf2( ) are identical except in their handling of +// NaN values. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_TF_MODE) +#include "fp_compare_impl.inc" + +COMPILER_RT_ABI CMP_RESULT __letf2(fp_t a, fp_t b) { return __leXf2__(a, b); } + +#if defined(__ELF__) +// Alias for libgcc compatibility +COMPILER_RT_ALIAS(__letf2, __cmptf2) +#endif +COMPILER_RT_ALIAS(__letf2, __eqtf2) +COMPILER_RT_ALIAS(__letf2, __lttf2) +COMPILER_RT_ALIAS(__letf2, __netf2) + +COMPILER_RT_ABI CMP_RESULT __getf2(fp_t a, fp_t b) { return __geXf2__(a, b); } + +COMPILER_RT_ALIAS(__getf2, __gttf2) + +COMPILER_RT_ABI CMP_RESULT __unordtf2(fp_t a, fp_t b) { + return __unordXf2__(a, b); +} + +#endif diff --git a/wasmrt/llvm-builtins/builtins/cpu_model.c b/wasmrt/llvm-builtins/builtins/cpu_model.c new file mode 100644 index 0000000..0bd7296 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/cpu_model.c @@ -0,0 +1,1390 @@ +//===-- cpu_model.c - Support for __cpu_model builtin ------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is based on LLVM's lib/Support/Host.cpp. +// It implements the operating system Host concept and builtin +// __cpu_model for the compiler_rt library for x86 and +// __aarch64_have_lse_atomics, __aarch64_cpu_features for AArch64. +// +//===----------------------------------------------------------------------===// + +#ifndef __has_attribute +#define __has_attribute(attr) 0 +#endif + +#if __has_attribute(constructor) +#if __GNUC__ >= 9 +// Ordinarily init priorities below 101 are disallowed as they are reserved for the +// implementation. However, we are the implementation, so silence the diagnostic, +// since it doesn't apply to us. +#pragma GCC diagnostic ignored "-Wprio-ctor-dtor" +#endif +// We're choosing init priority 90 to force our constructors to run before any +// constructors in the end user application (starting at priority 101). This value +// matches the libgcc choice for the same functions. +#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(90))) +#else +// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that +// this runs during initialization. +#define CONSTRUCTOR_ATTRIBUTE +#endif + +#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ + defined(_M_X64)) && \ + (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)) + +#include + +#define bool int +#define true 1 +#define false 0 + +#ifdef _MSC_VER +#include +#endif + +enum VendorSignatures { + SIG_INTEL = 0x756e6547, // Genu + SIG_AMD = 0x68747541, // Auth +}; + +enum ProcessorVendors { + VENDOR_INTEL = 1, + VENDOR_AMD, + VENDOR_OTHER, + VENDOR_MAX +}; + +enum ProcessorTypes { + INTEL_BONNELL = 1, + INTEL_CORE2, + INTEL_COREI7, + AMDFAM10H, + AMDFAM15H, + INTEL_SILVERMONT, + INTEL_KNL, + AMD_BTVER1, + AMD_BTVER2, + AMDFAM17H, + INTEL_KNM, + INTEL_GOLDMONT, + INTEL_GOLDMONT_PLUS, + INTEL_TREMONT, + AMDFAM19H, + ZHAOXIN_FAM7H, + INTEL_SIERRAFOREST, + INTEL_GRANDRIDGE, + CPU_TYPE_MAX +}; + +enum ProcessorSubtypes { + INTEL_COREI7_NEHALEM = 1, + INTEL_COREI7_WESTMERE, + INTEL_COREI7_SANDYBRIDGE, + AMDFAM10H_BARCELONA, + AMDFAM10H_SHANGHAI, + AMDFAM10H_ISTANBUL, + AMDFAM15H_BDVER1, + AMDFAM15H_BDVER2, + AMDFAM15H_BDVER3, + AMDFAM15H_BDVER4, + AMDFAM17H_ZNVER1, + INTEL_COREI7_IVYBRIDGE, + INTEL_COREI7_HASWELL, + INTEL_COREI7_BROADWELL, + INTEL_COREI7_SKYLAKE, + INTEL_COREI7_SKYLAKE_AVX512, + INTEL_COREI7_CANNONLAKE, + INTEL_COREI7_ICELAKE_CLIENT, + INTEL_COREI7_ICELAKE_SERVER, + AMDFAM17H_ZNVER2, + INTEL_COREI7_CASCADELAKE, + INTEL_COREI7_TIGERLAKE, + INTEL_COREI7_COOPERLAKE, + INTEL_COREI7_SAPPHIRERAPIDS, + INTEL_COREI7_ALDERLAKE, + AMDFAM19H_ZNVER3, + INTEL_COREI7_ROCKETLAKE, + ZHAOXIN_FAM7H_LUJIAZUI, + AMDFAM19H_ZNVER4, + INTEL_COREI7_GRANITERAPIDS, + INTEL_COREI7_GRANITERAPIDS_D, + CPU_SUBTYPE_MAX +}; + +enum ProcessorFeatures { + FEATURE_CMOV = 0, + FEATURE_MMX, + FEATURE_POPCNT, + FEATURE_SSE, + FEATURE_SSE2, + FEATURE_SSE3, + FEATURE_SSSE3, + FEATURE_SSE4_1, + FEATURE_SSE4_2, + FEATURE_AVX, + FEATURE_AVX2, + FEATURE_SSE4_A, + FEATURE_FMA4, + FEATURE_XOP, + FEATURE_FMA, + FEATURE_AVX512F, + FEATURE_BMI, + FEATURE_BMI2, + FEATURE_AES, + FEATURE_PCLMUL, + FEATURE_AVX512VL, + FEATURE_AVX512BW, + FEATURE_AVX512DQ, + FEATURE_AVX512CD, + FEATURE_AVX512ER, + FEATURE_AVX512PF, + FEATURE_AVX512VBMI, + FEATURE_AVX512IFMA, + FEATURE_AVX5124VNNIW, + FEATURE_AVX5124FMAPS, + FEATURE_AVX512VPOPCNTDQ, + FEATURE_AVX512VBMI2, + FEATURE_GFNI, + FEATURE_VPCLMULQDQ, + FEATURE_AVX512VNNI, + FEATURE_AVX512BITALG, + FEATURE_AVX512BF16, + FEATURE_AVX512VP2INTERSECT, + CPU_FEATURE_MAX +}; + +// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). +// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID +// support. Consequently, for i386, the presence of CPUID is checked first +// via the corresponding eflags bit. +static bool isCpuIdSupported(void) { +#if defined(__GNUC__) || defined(__clang__) +#if defined(__i386__) + int __cpuid_supported; + __asm__(" pushfl\n" + " popl %%eax\n" + " movl %%eax,%%ecx\n" + " xorl $0x00200000,%%eax\n" + " pushl %%eax\n" + " popfl\n" + " pushfl\n" + " popl %%eax\n" + " movl $0,%0\n" + " cmpl %%eax,%%ecx\n" + " je 1f\n" + " movl $1,%0\n" + "1:" + : "=r"(__cpuid_supported) + : + : "eax", "ecx"); + if (!__cpuid_supported) + return false; +#endif + return true; +#endif + return true; +} + +// This code is copied from lib/Support/Host.cpp. +// Changes to either file should be mirrored in the other. + +/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in +/// the specified arguments. If we can't run cpuid on the host, return true. +static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, + unsigned *rECX, unsigned *rEDX) { +#if defined(__GNUC__) || defined(__clang__) +#if defined(__x86_64__) + // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. + // FIXME: should we save this for Clang? + __asm__("movq\t%%rbx, %%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx, %%rsi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value)); + return false; +#elif defined(__i386__) + __asm__("movl\t%%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl\t%%ebx, %%esi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value)); + return false; +#else + return true; +#endif +#elif defined(_MSC_VER) + // The MSVC intrinsic is portable across x86 and x64. + int registers[4]; + __cpuid(registers, value); + *rEAX = registers[0]; + *rEBX = registers[1]; + *rECX = registers[2]; + *rEDX = registers[3]; + return false; +#else + return true; +#endif +} + +/// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return +/// the 4 values in the specified arguments. If we can't run cpuid on the host, +/// return true. +static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, + unsigned *rEAX, unsigned *rEBX, unsigned *rECX, + unsigned *rEDX) { +#if defined(__GNUC__) || defined(__clang__) +#if defined(__x86_64__) + // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. + // FIXME: should we save this for Clang? + __asm__("movq\t%%rbx, %%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx, %%rsi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value), "c"(subleaf)); + return false; +#elif defined(__i386__) + __asm__("movl\t%%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl\t%%ebx, %%esi\n\t" + : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) + : "a"(value), "c"(subleaf)); + return false; +#else + return true; +#endif +#elif defined(_MSC_VER) + int registers[4]; + __cpuidex(registers, value, subleaf); + *rEAX = registers[0]; + *rEBX = registers[1]; + *rECX = registers[2]; + *rEDX = registers[3]; + return false; +#else + return true; +#endif +} + +// Read control register 0 (XCR0). Used to detect features such as AVX. +static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { +#if defined(__GNUC__) || defined(__clang__) + // Check xgetbv; this uses a .byte sequence instead of the instruction + // directly because older assemblers do not include support for xgetbv and + // there is no easy way to conditionally compile based on the assembler used. + __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); + return false; +#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) + unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); + *rEAX = Result; + *rEDX = Result >> 32; + return false; +#else + return true; +#endif +} + +static void detectX86FamilyModel(unsigned EAX, unsigned *Family, + unsigned *Model) { + *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 + *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 + if (*Family == 6 || *Family == 0xf) { + if (*Family == 0xf) + // Examine extended family ID if family ID is F. + *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 + // Examine extended model ID if family ID is 6 or F. + *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 + } +} + +static const char * +getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, + const unsigned *Features, + unsigned *Type, unsigned *Subtype) { +#define testFeature(F) \ + (Features[F / 32] & (1 << (F % 32))) != 0 + + // We select CPU strings to match the code in Host.cpp, but we don't use them + // in compiler-rt. + const char *CPU = 0; + + switch (Family) { + case 6: + switch (Model) { + case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile + // processor, Intel Core 2 Quad processor, Intel Core 2 Quad + // mobile processor, Intel Core 2 Extreme processor, Intel + // Pentium Dual-Core processor, Intel Xeon processor, model + // 0Fh. All processors are manufactured using the 65 nm process. + case 0x16: // Intel Celeron processor model 16h. All processors are + // manufactured using the 65 nm process + CPU = "core2"; + *Type = INTEL_CORE2; + break; + case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model + // 17h. All processors are manufactured using the 45 nm process. + // + // 45nm: Penryn , Wolfdale, Yorkfield (XE) + case 0x1d: // Intel Xeon processor MP. All processors are manufactured using + // the 45 nm process. + CPU = "penryn"; + *Type = INTEL_CORE2; + break; + case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All + // processors are manufactured using the 45 nm process. + case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. + // As found in a Summer 2010 model iMac. + case 0x1f: + case 0x2e: // Nehalem EX + CPU = "nehalem"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_NEHALEM; + break; + case 0x25: // Intel Core i7, laptop version. + case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All + // processors are manufactured using the 32 nm process. + case 0x2f: // Westmere EX + CPU = "westmere"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_WESTMERE; + break; + case 0x2a: // Intel Core i7 processor. All processors are manufactured + // using the 32 nm process. + case 0x2d: + CPU = "sandybridge"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_SANDYBRIDGE; + break; + case 0x3a: + case 0x3e: // Ivy Bridge EP + CPU = "ivybridge"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_IVYBRIDGE; + break; + + // Haswell: + case 0x3c: + case 0x3f: + case 0x45: + case 0x46: + CPU = "haswell"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_HASWELL; + break; + + // Broadwell: + case 0x3d: + case 0x47: + case 0x4f: + case 0x56: + CPU = "broadwell"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_BROADWELL; + break; + + // Skylake: + case 0x4e: // Skylake mobile + case 0x5e: // Skylake desktop + case 0x8e: // Kaby Lake mobile + case 0x9e: // Kaby Lake desktop + case 0xa5: // Comet Lake-H/S + case 0xa6: // Comet Lake-U + CPU = "skylake"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_SKYLAKE; + break; + + // Rocketlake: + case 0xa7: + CPU = "rocketlake"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_ROCKETLAKE; + break; + + // Skylake Xeon: + case 0x55: + *Type = INTEL_COREI7; + if (testFeature(FEATURE_AVX512BF16)) { + CPU = "cooperlake"; + *Subtype = INTEL_COREI7_COOPERLAKE; + } else if (testFeature(FEATURE_AVX512VNNI)) { + CPU = "cascadelake"; + *Subtype = INTEL_COREI7_CASCADELAKE; + } else { + CPU = "skylake-avx512"; + *Subtype = INTEL_COREI7_SKYLAKE_AVX512; + } + break; + + // Cannonlake: + case 0x66: + CPU = "cannonlake"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_CANNONLAKE; + break; + + // Icelake: + case 0x7d: + case 0x7e: + CPU = "icelake-client"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_ICELAKE_CLIENT; + break; + + // Tigerlake: + case 0x8c: + case 0x8d: + CPU = "tigerlake"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_TIGERLAKE; + break; + + // Alderlake: + case 0x97: + case 0x9a: + // Raptorlake: + case 0xb7: + // Meteorlake: + case 0xaa: + case 0xac: + CPU = "alderlake"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_ALDERLAKE; + break; + + // Icelake Xeon: + case 0x6a: + case 0x6c: + CPU = "icelake-server"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_ICELAKE_SERVER; + break; + + // Emerald Rapids: + case 0xcf: + // Sapphire Rapids: + case 0x8f: + CPU = "sapphirerapids"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_SAPPHIRERAPIDS; + break; + + // Granite Rapids: + case 0xad: + CPU = "graniterapids"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_GRANITERAPIDS; + break; + + // Granite Rapids D: + case 0xae: + CPU = "graniterapids-d"; + *Type = INTEL_COREI7; + *Subtype = INTEL_COREI7_GRANITERAPIDS_D; + break; + + case 0x1c: // Most 45 nm Intel Atom processors + case 0x26: // 45 nm Atom Lincroft + case 0x27: // 32 nm Atom Medfield + case 0x35: // 32 nm Atom Midview + case 0x36: // 32 nm Atom Midview + CPU = "bonnell"; + *Type = INTEL_BONNELL; + break; + + // Atom Silvermont codes from the Intel software optimization guide. + case 0x37: + case 0x4a: + case 0x4d: + case 0x5a: + case 0x5d: + case 0x4c: // really airmont + CPU = "silvermont"; + *Type = INTEL_SILVERMONT; + break; + // Goldmont: + case 0x5c: // Apollo Lake + case 0x5f: // Denverton + CPU = "goldmont"; + *Type = INTEL_GOLDMONT; + break; // "goldmont" + case 0x7a: + CPU = "goldmont-plus"; + *Type = INTEL_GOLDMONT_PLUS; + break; + case 0x86: + CPU = "tremont"; + *Type = INTEL_TREMONT; + break; + + // Sierraforest: + case 0xaf: + CPU = "sierraforest"; + *Type = INTEL_SIERRAFOREST; + break; + + // Grandridge: + case 0xb6: + CPU = "grandridge"; + *Type = INTEL_GRANDRIDGE; + break; + + case 0x57: + CPU = "knl"; + *Type = INTEL_KNL; + break; + + case 0x85: + CPU = "knm"; + *Type = INTEL_KNM; + break; + + default: // Unknown family 6 CPU. + break; + } + break; + default: + break; // Unknown. + } + + return CPU; +} + +static const char * +getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, + const unsigned *Features, + unsigned *Type, unsigned *Subtype) { + // We select CPU strings to match the code in Host.cpp, but we don't use them + // in compiler-rt. + const char *CPU = 0; + + switch (Family) { + case 16: + CPU = "amdfam10"; + *Type = AMDFAM10H; + switch (Model) { + case 2: + *Subtype = AMDFAM10H_BARCELONA; + break; + case 4: + *Subtype = AMDFAM10H_SHANGHAI; + break; + case 8: + *Subtype = AMDFAM10H_ISTANBUL; + break; + } + break; + case 20: + CPU = "btver1"; + *Type = AMD_BTVER1; + break; + case 21: + CPU = "bdver1"; + *Type = AMDFAM15H; + if (Model >= 0x60 && Model <= 0x7f) { + CPU = "bdver4"; + *Subtype = AMDFAM15H_BDVER4; + break; // 60h-7Fh: Excavator + } + if (Model >= 0x30 && Model <= 0x3f) { + CPU = "bdver3"; + *Subtype = AMDFAM15H_BDVER3; + break; // 30h-3Fh: Steamroller + } + if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { + CPU = "bdver2"; + *Subtype = AMDFAM15H_BDVER2; + break; // 02h, 10h-1Fh: Piledriver + } + if (Model <= 0x0f) { + *Subtype = AMDFAM15H_BDVER1; + break; // 00h-0Fh: Bulldozer + } + break; + case 22: + CPU = "btver2"; + *Type = AMD_BTVER2; + break; + case 23: + CPU = "znver1"; + *Type = AMDFAM17H; + if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) { + CPU = "znver2"; + *Subtype = AMDFAM17H_ZNVER2; + break; // 30h-3fh, 71h: Zen2 + } + if (Model <= 0x0f) { + *Subtype = AMDFAM17H_ZNVER1; + break; // 00h-0Fh: Zen1 + } + break; + case 25: + CPU = "znver3"; + *Type = AMDFAM19H; + if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x5f)) { + // Family 19h Models 00h-0Fh - Zen3 + // Family 19h Models 20h-2Fh - Zen3 + // Family 19h Models 30h-3Fh - Zen3 + // Family 19h Models 40h-4Fh - Zen3+ + // Family 19h Models 50h-5Fh - Zen3+ + *Subtype = AMDFAM19H_ZNVER3; + break; + } + if ((Model >= 0x10 && Model <= 0x1f) || + (Model >= 0x60 && Model <= 0x74) || + (Model >= 0x78 && Model <= 0x7b) || + (Model >= 0xA0 && Model <= 0xAf)) { + CPU = "znver4"; + *Subtype = AMDFAM19H_ZNVER4; + break; // "znver4" + } + break; + default: + break; // Unknown AMD CPU. + } + + return CPU; +} + +static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, + unsigned *Features) { + unsigned EAX = 0, EBX = 0; + +#define setFeature(F) \ + Features[F / 32] |= 1U << (F % 32) + + if ((EDX >> 15) & 1) + setFeature(FEATURE_CMOV); + if ((EDX >> 23) & 1) + setFeature(FEATURE_MMX); + if ((EDX >> 25) & 1) + setFeature(FEATURE_SSE); + if ((EDX >> 26) & 1) + setFeature(FEATURE_SSE2); + + if ((ECX >> 0) & 1) + setFeature(FEATURE_SSE3); + if ((ECX >> 1) & 1) + setFeature(FEATURE_PCLMUL); + if ((ECX >> 9) & 1) + setFeature(FEATURE_SSSE3); + if ((ECX >> 12) & 1) + setFeature(FEATURE_FMA); + if ((ECX >> 19) & 1) + setFeature(FEATURE_SSE4_1); + if ((ECX >> 20) & 1) + setFeature(FEATURE_SSE4_2); + if ((ECX >> 23) & 1) + setFeature(FEATURE_POPCNT); + if ((ECX >> 25) & 1) + setFeature(FEATURE_AES); + + // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV + // indicates that the AVX registers will be saved and restored on context + // switch, then we have full AVX support. + const unsigned AVXBits = (1 << 27) | (1 << 28); + bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && + ((EAX & 0x6) == 0x6); +#if defined(__APPLE__) + // Darwin lazily saves the AVX512 context on first use: trust that the OS will + // save the AVX512 context if we use AVX512 instructions, even the bit is not + // set right now. + bool HasAVX512Save = true; +#else + // AVX512 requires additional context to be saved by the OS. + bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); +#endif + + if (HasAVX) + setFeature(FEATURE_AVX); + + bool HasLeaf7 = + MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); + + if (HasLeaf7 && ((EBX >> 3) & 1)) + setFeature(FEATURE_BMI); + if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) + setFeature(FEATURE_AVX2); + if (HasLeaf7 && ((EBX >> 8) & 1)) + setFeature(FEATURE_BMI2); + if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512F); + if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512DQ); + if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512IFMA); + if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512PF); + if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512ER); + if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512CD); + if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512BW); + if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512VL); + + if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512VBMI); + if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512VBMI2); + if (HasLeaf7 && ((ECX >> 8) & 1)) + setFeature(FEATURE_GFNI); + if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) + setFeature(FEATURE_VPCLMULQDQ); + if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512VNNI); + if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512BITALG); + if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512VPOPCNTDQ); + + if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX5124VNNIW); + if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX5124FMAPS); + if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512VP2INTERSECT); + + // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't + // return all 0s for invalid subleaves so check the limit. + bool HasLeaf7Subleaf1 = + HasLeaf7 && EAX >= 1 && + !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); + if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512BF16); + + unsigned MaxExtLevel; + getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); + + bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && + !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + if (HasExtLeaf1 && ((ECX >> 6) & 1)) + setFeature(FEATURE_SSE4_A); + if (HasExtLeaf1 && ((ECX >> 11) & 1)) + setFeature(FEATURE_XOP); + if (HasExtLeaf1 && ((ECX >> 16) & 1)) + setFeature(FEATURE_FMA4); +#undef setFeature +} + +#ifndef _WIN32 +__attribute__((visibility("hidden"))) +#endif +int __cpu_indicator_init(void) CONSTRUCTOR_ATTRIBUTE; + +#ifndef _WIN32 +__attribute__((visibility("hidden"))) +#endif +struct __processor_model { + unsigned int __cpu_vendor; + unsigned int __cpu_type; + unsigned int __cpu_subtype; + unsigned int __cpu_features[1]; +} __cpu_model = {0, 0, 0, {0}}; + +#ifndef _WIN32 +__attribute__((visibility("hidden"))) +#endif +unsigned int __cpu_features2 = 0; + +// A constructor function that is sets __cpu_model and __cpu_features2 with +// the right values. This needs to run only once. This constructor is +// given the highest priority and it should run before constructors without +// the priority set. However, it still runs after ifunc initializers and +// needs to be called explicitly there. + +int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) { + unsigned EAX, EBX, ECX, EDX; + unsigned MaxLeaf = 5; + unsigned Vendor; + unsigned Model, Family; + unsigned Features[(CPU_FEATURE_MAX + 31) / 32] = {0}; + + // This function needs to run just once. + if (__cpu_model.__cpu_vendor) + return 0; + + if (!isCpuIdSupported() || + getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) { + __cpu_model.__cpu_vendor = VENDOR_OTHER; + return -1; + } + + getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); + detectX86FamilyModel(EAX, &Family, &Model); + + // Find available features. + getAvailableFeatures(ECX, EDX, MaxLeaf, &Features[0]); + + assert((sizeof(Features)/sizeof(Features[0])) == 2); + __cpu_model.__cpu_features[0] = Features[0]; + __cpu_features2 = Features[1]; + + if (Vendor == SIG_INTEL) { + // Get CPU type. + getIntelProcessorTypeAndSubtype(Family, Model, &Features[0], + &(__cpu_model.__cpu_type), + &(__cpu_model.__cpu_subtype)); + __cpu_model.__cpu_vendor = VENDOR_INTEL; + } else if (Vendor == SIG_AMD) { + // Get CPU type. + getAMDProcessorTypeAndSubtype(Family, Model, &Features[0], + &(__cpu_model.__cpu_type), + &(__cpu_model.__cpu_subtype)); + __cpu_model.__cpu_vendor = VENDOR_AMD; + } else + __cpu_model.__cpu_vendor = VENDOR_OTHER; + + assert(__cpu_model.__cpu_vendor < VENDOR_MAX); + assert(__cpu_model.__cpu_type < CPU_TYPE_MAX); + assert(__cpu_model.__cpu_subtype < CPU_SUBTYPE_MAX); + + return 0; +} +#elif defined(__aarch64__) + +// LSE support detection for out-of-line atomics +// using HWCAP and Auxiliary vector +_Bool __aarch64_have_lse_atomics + __attribute__((visibility("hidden"), nocommon)); + +#if defined(__has_include) +#if __has_include() +#include + +#if __has_include() +#include +#else +typedef struct __ifunc_arg_t { + unsigned long _size; + unsigned long _hwcap; + unsigned long _hwcap2; +} __ifunc_arg_t; +#endif // __has_include() + +#if __has_include() +#include + +#if defined(__ANDROID__) +#include +#include +#elif defined(__Fuchsia__) +#include +#include +#endif + +#ifndef _IFUNC_ARG_HWCAP +#define _IFUNC_ARG_HWCAP (1ULL << 62) +#endif +#ifndef AT_HWCAP +#define AT_HWCAP 16 +#endif +#ifndef HWCAP_CPUID +#define HWCAP_CPUID (1 << 11) +#endif +#ifndef HWCAP_FP +#define HWCAP_FP (1 << 0) +#endif +#ifndef HWCAP_ASIMD +#define HWCAP_ASIMD (1 << 1) +#endif +#ifndef HWCAP_AES +#define HWCAP_AES (1 << 3) +#endif +#ifndef HWCAP_PMULL +#define HWCAP_PMULL (1 << 4) +#endif +#ifndef HWCAP_SHA1 +#define HWCAP_SHA1 (1 << 5) +#endif +#ifndef HWCAP_SHA2 +#define HWCAP_SHA2 (1 << 6) +#endif +#ifndef HWCAP_ATOMICS +#define HWCAP_ATOMICS (1 << 8) +#endif +#ifndef HWCAP_FPHP +#define HWCAP_FPHP (1 << 9) +#endif +#ifndef HWCAP_ASIMDHP +#define HWCAP_ASIMDHP (1 << 10) +#endif +#ifndef HWCAP_ASIMDRDM +#define HWCAP_ASIMDRDM (1 << 12) +#endif +#ifndef HWCAP_JSCVT +#define HWCAP_JSCVT (1 << 13) +#endif +#ifndef HWCAP_FCMA +#define HWCAP_FCMA (1 << 14) +#endif +#ifndef HWCAP_LRCPC +#define HWCAP_LRCPC (1 << 15) +#endif +#ifndef HWCAP_DCPOP +#define HWCAP_DCPOP (1 << 16) +#endif +#ifndef HWCAP_SHA3 +#define HWCAP_SHA3 (1 << 17) +#endif +#ifndef HWCAP_SM3 +#define HWCAP_SM3 (1 << 18) +#endif +#ifndef HWCAP_SM4 +#define HWCAP_SM4 (1 << 19) +#endif +#ifndef HWCAP_ASIMDDP +#define HWCAP_ASIMDDP (1 << 20) +#endif +#ifndef HWCAP_SHA512 +#define HWCAP_SHA512 (1 << 21) +#endif +#ifndef HWCAP_SVE +#define HWCAP_SVE (1 << 22) +#endif +#ifndef HWCAP_ASIMDFHM +#define HWCAP_ASIMDFHM (1 << 23) +#endif +#ifndef HWCAP_DIT +#define HWCAP_DIT (1 << 24) +#endif +#ifndef HWCAP_ILRCPC +#define HWCAP_ILRCPC (1 << 26) +#endif +#ifndef HWCAP_FLAGM +#define HWCAP_FLAGM (1 << 27) +#endif +#ifndef HWCAP_SSBS +#define HWCAP_SSBS (1 << 28) +#endif +#ifndef HWCAP_SB +#define HWCAP_SB (1 << 29) +#endif + +#ifndef AT_HWCAP2 +#define AT_HWCAP2 26 +#endif +#ifndef HWCAP2_DCPODP +#define HWCAP2_DCPODP (1 << 0) +#endif +#ifndef HWCAP2_SVE2 +#define HWCAP2_SVE2 (1 << 1) +#endif +#ifndef HWCAP2_SVEAES +#define HWCAP2_SVEAES (1 << 2) +#endif +#ifndef HWCAP2_SVEPMULL +#define HWCAP2_SVEPMULL (1 << 3) +#endif +#ifndef HWCAP2_SVEBITPERM +#define HWCAP2_SVEBITPERM (1 << 4) +#endif +#ifndef HWCAP2_SVESHA3 +#define HWCAP2_SVESHA3 (1 << 5) +#endif +#ifndef HWCAP2_SVESM4 +#define HWCAP2_SVESM4 (1 << 6) +#endif +#ifndef HWCAP2_FLAGM2 +#define HWCAP2_FLAGM2 (1 << 7) +#endif +#ifndef HWCAP2_FRINT +#define HWCAP2_FRINT (1 << 8) +#endif +#ifndef HWCAP2_SVEI8MM +#define HWCAP2_SVEI8MM (1 << 9) +#endif +#ifndef HWCAP2_SVEF32MM +#define HWCAP2_SVEF32MM (1 << 10) +#endif +#ifndef HWCAP2_SVEF64MM +#define HWCAP2_SVEF64MM (1 << 11) +#endif +#ifndef HWCAP2_SVEBF16 +#define HWCAP2_SVEBF16 (1 << 12) +#endif +#ifndef HWCAP2_I8MM +#define HWCAP2_I8MM (1 << 13) +#endif +#ifndef HWCAP2_BF16 +#define HWCAP2_BF16 (1 << 14) +#endif +#ifndef HWCAP2_DGH +#define HWCAP2_DGH (1 << 15) +#endif +#ifndef HWCAP2_RNG +#define HWCAP2_RNG (1 << 16) +#endif +#ifndef HWCAP2_BTI +#define HWCAP2_BTI (1 << 17) +#endif +#ifndef HWCAP2_MTE +#define HWCAP2_MTE (1 << 18) +#endif +#ifndef HWCAP2_RPRES +#define HWCAP2_RPRES (1 << 21) +#endif +#ifndef HWCAP2_MTE3 +#define HWCAP2_MTE3 (1 << 22) +#endif +#ifndef HWCAP2_SME +#define HWCAP2_SME (1 << 23) +#endif +#ifndef HWCAP2_SME_I16I64 +#define HWCAP2_SME_I16I64 (1 << 24) +#endif +#ifndef HWCAP2_SME_F64F64 +#define HWCAP2_SME_F64F64 (1 << 25) +#endif +#ifndef HWCAP2_WFXT +#define HWCAP2_WFXT (1UL << 31) +#endif +#ifndef HWCAP2_EBF16 +#define HWCAP2_EBF16 (1UL << 32) +#endif +#ifndef HWCAP2_SVE_EBF16 +#define HWCAP2_SVE_EBF16 (1UL << 33) +#endif + +// Detect Exynos 9810 CPU +#define IF_EXYNOS9810 \ + char arch[PROP_VALUE_MAX]; \ + if (__system_property_get("ro.arch", arch) > 0 && \ + strncmp(arch, "exynos9810", sizeof("exynos9810") - 1) == 0) + +static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) { +#if defined(__FreeBSD__) + unsigned long hwcap; + int result = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap); + __aarch64_have_lse_atomics = result == 0 && (hwcap & HWCAP_ATOMICS) != 0; +#elif defined(__Fuchsia__) + // This ensures the vDSO is a direct link-time dependency of anything that + // needs this initializer code. +#pragma comment(lib, "zircon") + uint32_t features; + zx_status_t status = _zx_system_get_features(ZX_FEATURE_KIND_CPU, &features); + __aarch64_have_lse_atomics = + status == ZX_OK && (features & ZX_ARM64_FEATURE_ISA_ATOMICS) != 0; +#else + unsigned long hwcap = getauxval(AT_HWCAP); + _Bool result = (hwcap & HWCAP_ATOMICS) != 0; +#if defined(__ANDROID__) + if (result) { + // Some cores in the Exynos 9810 CPU are ARMv8.2 and others are ARMv8.0; + // only the former support LSE atomics. However, the kernel in the + // initial Android 8.0 release of Galaxy S9/S9+ devices incorrectly + // reported the feature as being supported. + // + // The kernel appears to have been corrected to mark it unsupported as of + // the Android 9.0 release on those devices, and this issue has not been + // observed anywhere else. Thus, this workaround may be removed if + // compiler-rt ever drops support for Android 8.0. + IF_EXYNOS9810 result = false; + } +#endif // defined(__ANDROID__) + __aarch64_have_lse_atomics = result; +#endif // defined(__FreeBSD__) +} + +#if !defined(DISABLE_AARCH64_FMV) +// CPUFeatures must correspond to the same AArch64 features in +// AArch64TargetParser.h +enum CPUFeatures { + FEAT_RNG, + FEAT_FLAGM, + FEAT_FLAGM2, + FEAT_FP16FML, + FEAT_DOTPROD, + FEAT_SM4, + FEAT_RDM, + FEAT_LSE, + FEAT_FP, + FEAT_SIMD, + FEAT_CRC, + FEAT_SHA1, + FEAT_SHA2, + FEAT_SHA3, + FEAT_AES, + FEAT_PMULL, + FEAT_FP16, + FEAT_DIT, + FEAT_DPB, + FEAT_DPB2, + FEAT_JSCVT, + FEAT_FCMA, + FEAT_RCPC, + FEAT_RCPC2, + FEAT_FRINTTS, + FEAT_DGH, + FEAT_I8MM, + FEAT_BF16, + FEAT_EBF16, + FEAT_RPRES, + FEAT_SVE, + FEAT_SVE_BF16, + FEAT_SVE_EBF16, + FEAT_SVE_I8MM, + FEAT_SVE_F32MM, + FEAT_SVE_F64MM, + FEAT_SVE2, + FEAT_SVE_AES, + FEAT_SVE_PMULL128, + FEAT_SVE_BITPERM, + FEAT_SVE_SHA3, + FEAT_SVE_SM4, + FEAT_SME, + FEAT_MEMTAG, + FEAT_MEMTAG2, + FEAT_MEMTAG3, + FEAT_SB, + FEAT_PREDRES, + FEAT_SSBS, + FEAT_SSBS2, + FEAT_BTI, + FEAT_LS64, + FEAT_LS64_V, + FEAT_LS64_ACCDATA, + FEAT_WFXT, + FEAT_SME_F64, + FEAT_SME_I64, + FEAT_SME2, + FEAT_MAX +}; + +// Architecture features used +// in Function Multi Versioning +struct { + unsigned long long features; + // As features grows new fields could be added +} __aarch64_cpu_features __attribute__((visibility("hidden"), nocommon)); + +void init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) { +#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F +#define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr)) +#define extractBits(val, start, number) \ + (val & ((1ULL << number) - 1ULL) << start) >> start + if (__aarch64_cpu_features.features) + return; + unsigned long hwcap2 = 0; + if (hwcap & _IFUNC_ARG_HWCAP) + hwcap2 = arg->_hwcap2; + if (hwcap & HWCAP_CRC32) + setCPUFeature(FEAT_CRC); + if (hwcap & HWCAP_PMULL) + setCPUFeature(FEAT_PMULL); + if (hwcap & HWCAP_FLAGM) + setCPUFeature(FEAT_FLAGM); + if (hwcap2 & HWCAP2_FLAGM2) { + setCPUFeature(FEAT_FLAGM); + setCPUFeature(FEAT_FLAGM2); + } + if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4) + setCPUFeature(FEAT_SM4); + if (hwcap & HWCAP_ASIMDDP) + setCPUFeature(FEAT_DOTPROD); + if (hwcap & HWCAP_ASIMDFHM) + setCPUFeature(FEAT_FP16FML); + if (hwcap & HWCAP_FPHP) { + setCPUFeature(FEAT_FP16); + setCPUFeature(FEAT_FP); + } + if (hwcap & HWCAP_DIT) + setCPUFeature(FEAT_DIT); + if (hwcap & HWCAP_ASIMDRDM) + setCPUFeature(FEAT_RDM); + if (hwcap & HWCAP_ILRCPC) + setCPUFeature(FEAT_RCPC2); + if (hwcap & HWCAP_AES) + setCPUFeature(FEAT_AES); + if (hwcap & HWCAP_SHA1) + setCPUFeature(FEAT_SHA1); + if (hwcap & HWCAP_SHA2) + setCPUFeature(FEAT_SHA2); + if (hwcap & HWCAP_JSCVT) + setCPUFeature(FEAT_JSCVT); + if (hwcap & HWCAP_FCMA) + setCPUFeature(FEAT_FCMA); + if (hwcap & HWCAP_SB) + setCPUFeature(FEAT_SB); + if (hwcap & HWCAP_SSBS) + setCPUFeature(FEAT_SSBS2); + if (hwcap2 & HWCAP2_MTE) { + setCPUFeature(FEAT_MEMTAG); + setCPUFeature(FEAT_MEMTAG2); + } + if (hwcap2 & HWCAP2_MTE3) { + setCPUFeature(FEAT_MEMTAG); + setCPUFeature(FEAT_MEMTAG2); + setCPUFeature(FEAT_MEMTAG3); + } + if (hwcap2 & HWCAP2_SVEAES) + setCPUFeature(FEAT_SVE_AES); + if (hwcap2 & HWCAP2_SVEPMULL) { + setCPUFeature(FEAT_SVE_AES); + setCPUFeature(FEAT_SVE_PMULL128); + } + if (hwcap2 & HWCAP2_SVEBITPERM) + setCPUFeature(FEAT_SVE_BITPERM); + if (hwcap2 & HWCAP2_SVESHA3) + setCPUFeature(FEAT_SVE_SHA3); + if (hwcap2 & HWCAP2_SVESM4) + setCPUFeature(FEAT_SVE_SM4); + if (hwcap2 & HWCAP2_DCPODP) + setCPUFeature(FEAT_DPB2); + if (hwcap & HWCAP_ATOMICS) + setCPUFeature(FEAT_LSE); + if (hwcap2 & HWCAP2_RNG) + setCPUFeature(FEAT_RNG); + if (hwcap2 & HWCAP2_I8MM) + setCPUFeature(FEAT_I8MM); + if (hwcap2 & HWCAP2_EBF16) + setCPUFeature(FEAT_EBF16); + if (hwcap2 & HWCAP2_SVE_EBF16) + setCPUFeature(FEAT_SVE_EBF16); + if (hwcap2 & HWCAP2_DGH) + setCPUFeature(FEAT_DGH); + if (hwcap2 & HWCAP2_FRINT) + setCPUFeature(FEAT_FRINTTS); + if (hwcap2 & HWCAP2_SVEI8MM) + setCPUFeature(FEAT_SVE_I8MM); + if (hwcap2 & HWCAP2_SVEF32MM) + setCPUFeature(FEAT_SVE_F32MM); + if (hwcap2 & HWCAP2_SVEF64MM) + setCPUFeature(FEAT_SVE_F64MM); + if (hwcap2 & HWCAP2_BTI) + setCPUFeature(FEAT_BTI); + if (hwcap2 & HWCAP2_RPRES) + setCPUFeature(FEAT_RPRES); + if (hwcap2 & HWCAP2_WFXT) + setCPUFeature(FEAT_WFXT); + if (hwcap2 & HWCAP2_SME) + setCPUFeature(FEAT_SME); + if (hwcap2 & HWCAP2_SME_I16I64) + setCPUFeature(FEAT_SME_I64); + if (hwcap2 & HWCAP2_SME_F64F64) + setCPUFeature(FEAT_SME_F64); + if (hwcap & HWCAP_CPUID) { + unsigned long ftr; + getCPUFeature(ID_AA64PFR1_EL1, ftr); + // ID_AA64PFR1_EL1.MTE >= 0b0001 + if (extractBits(ftr, 8, 4) >= 0x1) + setCPUFeature(FEAT_MEMTAG); + // ID_AA64PFR1_EL1.SSBS == 0b0001 + if (extractBits(ftr, 4, 4) == 0x1) + setCPUFeature(FEAT_SSBS); + // ID_AA64PFR1_EL1.SME == 0b0010 + if (extractBits(ftr, 24, 4) == 0x2) + setCPUFeature(FEAT_SME2); + getCPUFeature(ID_AA64PFR0_EL1, ftr); + // ID_AA64PFR0_EL1.FP != 0b1111 + if (extractBits(ftr, 16, 4) != 0xF) { + setCPUFeature(FEAT_FP); + // ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP + setCPUFeature(FEAT_SIMD); + } + // ID_AA64PFR0_EL1.SVE != 0b0000 + if (extractBits(ftr, 32, 4) != 0x0) { + // get ID_AA64ZFR0_EL1, that name supported + // if sve enabled only + getCPUFeature(S3_0_C0_C4_4, ftr); + // ID_AA64ZFR0_EL1.SVEver == 0b0000 + if (extractBits(ftr, 0, 4) == 0x0) + setCPUFeature(FEAT_SVE); + // ID_AA64ZFR0_EL1.SVEver == 0b0001 + if (extractBits(ftr, 0, 4) == 0x1) + setCPUFeature(FEAT_SVE2); + // ID_AA64ZFR0_EL1.BF16 != 0b0000 + if (extractBits(ftr, 20, 4) != 0x0) + setCPUFeature(FEAT_SVE_BF16); + } + getCPUFeature(ID_AA64ISAR0_EL1, ftr); + // ID_AA64ISAR0_EL1.SHA3 != 0b0000 + if (extractBits(ftr, 32, 4) != 0x0) + setCPUFeature(FEAT_SHA3); + getCPUFeature(ID_AA64ISAR1_EL1, ftr); + // ID_AA64ISAR1_EL1.DPB >= 0b0001 + if (extractBits(ftr, 0, 4) >= 0x1) + setCPUFeature(FEAT_DPB); + // ID_AA64ISAR1_EL1.LRCPC != 0b0000 + if (extractBits(ftr, 20, 4) != 0x0) + setCPUFeature(FEAT_RCPC); + // ID_AA64ISAR1_EL1.SPECRES == 0b0001 + if (extractBits(ftr, 40, 4) == 0x2) + setCPUFeature(FEAT_PREDRES); + // ID_AA64ISAR1_EL1.BF16 != 0b0000 + if (extractBits(ftr, 44, 4) != 0x0) + setCPUFeature(FEAT_BF16); + // ID_AA64ISAR1_EL1.LS64 >= 0b0001 + if (extractBits(ftr, 60, 4) >= 0x1) + setCPUFeature(FEAT_LS64); + // ID_AA64ISAR1_EL1.LS64 >= 0b0010 + if (extractBits(ftr, 60, 4) >= 0x2) + setCPUFeature(FEAT_LS64_V); + // ID_AA64ISAR1_EL1.LS64 >= 0b0011 + if (extractBits(ftr, 60, 4) >= 0x3) + setCPUFeature(FEAT_LS64_ACCDATA); + } else { + // Set some features in case of no CPUID support + if (hwcap & (HWCAP_FP | HWCAP_FPHP)) { + setCPUFeature(FEAT_FP); + // FP and AdvSIMD fields have the same value + setCPUFeature(FEAT_SIMD); + } + if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP) + setCPUFeature(FEAT_DPB); + if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC) + setCPUFeature(FEAT_RCPC); + if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16) + setCPUFeature(FEAT_BF16); + if (hwcap2 & HWCAP2_SVEBF16) + setCPUFeature(FEAT_SVE_BF16); + if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE) + setCPUFeature(FEAT_SVE2); + if (hwcap & HWCAP_SHA3) + setCPUFeature(FEAT_SHA3); + } + setCPUFeature(FEAT_MAX); +} + +void CONSTRUCTOR_ATTRIBUTE init_cpu_features(void) { + unsigned long hwcap; + unsigned long hwcap2; + // CPU features already initialized. + if (__aarch64_cpu_features.features) + return; +#if defined(__FreeBSD__) + int res = 0; + res = elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap); + res |= elf_aux_info(AT_HWCAP2, &hwcap2, sizeof hwcap2); + if (res) + return; +#else +#if defined(__ANDROID__) + // Don't set any CPU features, + // detection could be wrong on Exynos 9810. + IF_EXYNOS9810 return; +#endif // defined(__ANDROID__) + hwcap = getauxval(AT_HWCAP); + hwcap2 = getauxval(AT_HWCAP2); +#endif // defined(__FreeBSD__) + __ifunc_arg_t arg; + arg._size = sizeof(__ifunc_arg_t); + arg._hwcap = hwcap; + arg._hwcap2 = hwcap2; + init_cpu_features_resolver(hwcap | _IFUNC_ARG_HWCAP, &arg); +#undef extractBits +#undef getCPUFeature +#undef setCPUFeature +#undef IF_EXYNOS9810 +} +#endif // !defined(DISABLE_AARCH64_FMV) +#endif // defined(__has_include) +#endif // __has_include() +#endif // __has_include() +#endif // defined(__aarch64__) diff --git a/wasmrt/llvm-builtins/builtins/crtbegin.c b/wasmrt/llvm-builtins/builtins/crtbegin.c new file mode 100644 index 0000000..a0860ca --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/crtbegin.c @@ -0,0 +1,135 @@ +//===-- crtbegin.c - Start of constructors and destructors ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +__attribute__((visibility("hidden"))) void *__dso_handle = &__dso_handle; + +#ifdef EH_USE_FRAME_REGISTRY +__extension__ static void *__EH_FRAME_LIST__[] + __attribute__((section(".eh_frame"), aligned(sizeof(void *)))) = {}; + +extern void __register_frame_info(const void *, void *) __attribute__((weak)); +extern void *__deregister_frame_info(const void *) __attribute__((weak)); +#endif + +#ifndef CRT_HAS_INITFINI_ARRAY +typedef void (*fp)(void); + +static fp __CTOR_LIST__[] + __attribute__((section(".ctors"), aligned(sizeof(fp)))) = {(fp)-1}; +extern fp __CTOR_LIST_END__[]; +#endif + +extern void __cxa_finalize(void *) __attribute__((weak)); + +static void __attribute__((used)) __do_init(void) { + static _Bool __initialized; + if (__builtin_expect(__initialized, 0)) + return; + __initialized = 1; + +#ifdef EH_USE_FRAME_REGISTRY + static struct { void *p[8]; } __object; + if (__register_frame_info) + __register_frame_info(__EH_FRAME_LIST__, &__object); +#endif +#ifndef CRT_HAS_INITFINI_ARRAY + const size_t n = __CTOR_LIST_END__ - __CTOR_LIST__ - 1; + for (size_t i = n; i >= 1; i--) __CTOR_LIST__[i](); +#endif +} + +#ifdef CRT_HAS_INITFINI_ARRAY +__attribute__((section(".init_array"), + used)) static void (*__init)(void) = __do_init; +#elif defined(__i386__) || defined(__x86_64__) +__asm__(".pushsection .init,\"ax\",@progbits\n\t" + "call __do_init\n\t" + ".popsection"); +#elif defined(__riscv) +__asm__(".pushsection .init,\"ax\",%progbits\n\t" + "call __do_init\n\t" + ".popsection"); +#elif defined(__arm__) || defined(__aarch64__) +__asm__(".pushsection .init,\"ax\",%progbits\n\t" + "bl __do_init\n\t" + ".popsection"); +#elif defined(__mips__) +__asm__(".pushsection .init,\"ax\",@progbits\n\t" + "jal __do_init\n\t" + ".popsection"); +#elif defined(__powerpc__) || defined(__powerpc64__) +__asm__(".pushsection .init,\"ax\",@progbits\n\t" + "bl __do_init\n\t" + "nop\n\t" + ".popsection"); +#elif defined(__sparc__) +__asm__(".pushsection .init,\"ax\",@progbits\n\t" + "call __do_init\n\t" + ".popsection"); +#else +#error "crtbegin without .init_fini array unimplemented for this architecture" +#endif // CRT_HAS_INITFINI_ARRAY + +#ifndef CRT_HAS_INITFINI_ARRAY +static fp __DTOR_LIST__[] + __attribute__((section(".dtors"), aligned(sizeof(fp)))) = {(fp)-1}; +extern fp __DTOR_LIST_END__[]; +#endif + +static void __attribute__((used)) __do_fini(void) { + static _Bool __finalized; + if (__builtin_expect(__finalized, 0)) + return; + __finalized = 1; + + if (__cxa_finalize) + __cxa_finalize(__dso_handle); + +#ifndef CRT_HAS_INITFINI_ARRAY + const size_t n = __DTOR_LIST_END__ - __DTOR_LIST__ - 1; + for (size_t i = 1; i <= n; i++) __DTOR_LIST__[i](); +#endif +#ifdef EH_USE_FRAME_REGISTRY + if (__deregister_frame_info) + __deregister_frame_info(__EH_FRAME_LIST__); +#endif +} + +#ifdef CRT_HAS_INITFINI_ARRAY +__attribute__((section(".fini_array"), + used)) static void (*__fini)(void) = __do_fini; +#elif defined(__i386__) || defined(__x86_64__) +__asm__(".pushsection .fini,\"ax\",@progbits\n\t" + "call __do_fini\n\t" + ".popsection"); +#elif defined(__arm__) || defined(__aarch64__) +__asm__(".pushsection .fini,\"ax\",%progbits\n\t" + "bl __do_fini\n\t" + ".popsection"); +#elif defined(__mips__) +__asm__(".pushsection .fini,\"ax\",@progbits\n\t" + "jal __do_fini\n\t" + ".popsection"); +#elif defined(__powerpc__) || defined(__powerpc64__) +__asm__(".pushsection .fini,\"ax\",@progbits\n\t" + "bl __do_fini\n\t" + "nop\n\t" + ".popsection"); +#elif defined(__riscv) +__asm__(".pushsection .fini,\"ax\",@progbits\n\t" + "call __do_fini\n\t" + ".popsection"); +#elif defined(__sparc__) +__asm__(".pushsection .fini,\"ax\",@progbits\n\t" + "call __do_fini\n\t" + ".popsection"); +#else +#error "crtbegin without .init_fini array unimplemented for this architecture" +#endif // CRT_HAS_INIT_FINI_ARRAY diff --git a/wasmrt/llvm-builtins/builtins/crtend.c b/wasmrt/llvm-builtins/builtins/crtend.c new file mode 100644 index 0000000..ebcc60b --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/crtend.c @@ -0,0 +1,22 @@ +//===-- crtend.c - End of constructors and destructors --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +// Put 4-byte zero which is the length field in FDE at the end as a terminator. +const int32_t __EH_FRAME_LIST_END__[] + __attribute__((section(".eh_frame"), aligned(sizeof(int32_t)), + visibility("hidden"), used)) = {0}; + +#ifndef CRT_HAS_INITFINI_ARRAY +typedef void (*fp)(void); +fp __CTOR_LIST_END__[] + __attribute__((section(".ctors"), visibility("hidden"), used)) = {0}; +fp __DTOR_LIST_END__[] + __attribute__((section(".dtors"), visibility("hidden"), used)) = {0}; +#endif diff --git a/wasmrt/llvm-builtins/builtins/ctzdi2.c b/wasmrt/llvm-builtins/builtins/ctzdi2.c new file mode 100644 index 0000000..26c908d --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/ctzdi2.c @@ -0,0 +1,35 @@ +//===-- ctzdi2.c - Implement __ctzdi2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __ctzdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: the number of trailing 0-bits + +#if !defined(__clang__) && \ + ((defined(__sparc__) && defined(__arch64__)) || defined(__mips64) || \ + (defined(__riscv) && __SIZEOF_POINTER__ >= 8)) +// On 64-bit architectures with neither a native clz instruction nor a native +// ctz instruction, gcc resolves __builtin_ctz to __ctzdi2 rather than +// __ctzsi2, leading to infinite recursion. +#define __builtin_ctz(a) __ctzsi2(a) +extern int __ctzsi2(si_int); +#endif + +// Precondition: a != 0 + +COMPILER_RT_ABI int __ctzdi2(di_int a) { + dwords x; + x.all = a; + const si_int f = -(x.s.low == 0); + return ctzsi((x.s.high & f) | (x.s.low & ~f)) + + (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); +} diff --git a/wasmrt/llvm-builtins/builtins/ctzsi2.c b/wasmrt/llvm-builtins/builtins/ctzsi2.c new file mode 100644 index 0000000..ed95c60 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/ctzsi2.c @@ -0,0 +1,53 @@ +//===-- ctzsi2.c - Implement __ctzsi2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __ctzsi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: the number of trailing 0-bits + +// Precondition: a != 0 + +COMPILER_RT_ABI int __ctzsi2(si_int a) { + su_int x = (su_int)a; + si_int t = ((x & 0x0000FFFF) == 0) + << 4; // if (x has no small bits) t = 16 else 0 + x >>= t; // x = [0 - 0xFFFF] + higher garbage bits + su_int r = t; // r = [0, 16] + // return r + ctz(x) + t = ((x & 0x00FF) == 0) << 3; + x >>= t; // x = [0 - 0xFF] + higher garbage bits + r += t; // r = [0, 8, 16, 24] + // return r + ctz(x) + t = ((x & 0x0F) == 0) << 2; + x >>= t; // x = [0 - 0xF] + higher garbage bits + r += t; // r = [0, 4, 8, 12, 16, 20, 24, 28] + // return r + ctz(x) + t = ((x & 0x3) == 0) << 1; + x >>= t; + x &= 3; // x = [0 - 3] + r += t; // r = [0 - 30] and is even + // return r + ctz(x) + + // The branch-less return statement below is equivalent + // to the following switch statement: + // switch (x) + // { + // case 0: + // return r + 2; + // case 2: + // return r + 1; + // case 1: + // case 3: + // return r; + // } + return r + ((2 - (x >> 1)) & -((x & 1) == 0)); +} diff --git a/wasmrt/llvm-builtins/builtins/ctzti2.c b/wasmrt/llvm-builtins/builtins/ctzti2.c new file mode 100644 index 0000000..fb136d0 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/ctzti2.c @@ -0,0 +1,29 @@ +//===-- ctzti2.c - Implement __ctzti2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __ctzti2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +// Returns: the number of trailing 0-bits + +// Precondition: a != 0 + +COMPILER_RT_ABI int __ctzti2(ti_int a) { + twords x; + x.all = a; + const di_int f = -(x.s.low == 0); + return __builtin_ctzll((x.s.high & f) | (x.s.low & ~f)) + + ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); +} + +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/divdc3.c b/wasmrt/llvm-builtins/builtins/divdc3.c new file mode 100644 index 0000000..5581182 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/divdc3.c @@ -0,0 +1,55 @@ +//===-- divdc3.c - Implement __divdc3 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __divdc3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" +#include "int_lib.h" +#include "int_math.h" + +// Returns: the quotient of (a + ib) / (c + id) + +COMPILER_RT_ABI Dcomplex __divdc3(double __a, double __b, double __c, + double __d) { + int __ilogbw = 0; + double __logbw = __compiler_rt_logb(__compiler_rt_fmax(crt_fabs(__c), + crt_fabs(__d))); + if (crt_isfinite(__logbw)) { + __ilogbw = (int)__logbw; + __c = __compiler_rt_scalbn(__c, -__ilogbw); + __d = __compiler_rt_scalbn(__d, -__ilogbw); + } + double __denom = __c * __c + __d * __d; + Dcomplex z; + COMPLEX_REAL(z) = + __compiler_rt_scalbn((__a * __c + __b * __d) / __denom, -__ilogbw); + COMPLEX_IMAGINARY(z) = + __compiler_rt_scalbn((__b * __c - __a * __d) / __denom, -__ilogbw); + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) { + if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) { + COMPLEX_REAL(z) = crt_copysign(CRT_INFINITY, __c) * __a; + COMPLEX_IMAGINARY(z) = crt_copysign(CRT_INFINITY, __c) * __b; + } else if ((crt_isinf(__a) || crt_isinf(__b)) && crt_isfinite(__c) && + crt_isfinite(__d)) { + __a = crt_copysign(crt_isinf(__a) ? 1.0 : 0.0, __a); + __b = crt_copysign(crt_isinf(__b) ? 1.0 : 0.0, __b); + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); + } else if (crt_isinf(__logbw) && __logbw > 0.0 && crt_isfinite(__a) && + crt_isfinite(__b)) { + __c = crt_copysign(crt_isinf(__c) ? 1.0 : 0.0, __c); + __d = crt_copysign(crt_isinf(__d) ? 1.0 : 0.0, __d); + COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d); + } + } + return z; +} diff --git a/wasmrt/llvm-builtins/builtins/divdf3.c b/wasmrt/llvm-builtins/builtins/divdf3.c new file mode 100644 index 0000000..4c11759 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/divdf3.c @@ -0,0 +1,29 @@ +//===-- lib/divdf3.c - Double-precision division ------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements double-precision soft-float division +// with the IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION + +#define NUMBER_OF_HALF_ITERATIONS 3 +#define NUMBER_OF_FULL_ITERATIONS 1 + +#include "fp_div_impl.inc" + +COMPILER_RT_ABI fp_t __divdf3(fp_t a, fp_t b) { return __divXf3__(a, b); } + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_ddiv(fp_t a, fp_t b) { return __divdf3(a, b); } +#else +COMPILER_RT_ALIAS(__divdf3, __aeabi_ddiv) +#endif +#endif diff --git a/wasmrt/llvm-builtins/builtins/divdi3.c b/wasmrt/llvm-builtins/builtins/divdi3.c new file mode 100644 index 0000000..d71e138 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/divdi3.c @@ -0,0 +1,22 @@ +//===-- divdi3.c - Implement __divdi3 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __divdi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: a / b + +#define fixint_t di_int +#define fixuint_t du_int +#define COMPUTE_UDIV(a, b) __udivmoddi4((a), (b), (du_int *)0) +#include "int_div_impl.inc" + +COMPILER_RT_ABI di_int __divdi3(di_int a, di_int b) { return __divXi3(a, b); } diff --git a/wasmrt/llvm-builtins/builtins/divmoddi4.c b/wasmrt/llvm-builtins/builtins/divmoddi4.c new file mode 100644 index 0000000..e7cbbb1 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/divmoddi4.c @@ -0,0 +1,28 @@ +//===-- divmoddi4.c - Implement __divmoddi4 -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __divmoddi4 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: a / b, *rem = a % b + +COMPILER_RT_ABI di_int __divmoddi4(di_int a, di_int b, di_int *rem) { + const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; + di_int s_a = a >> bits_in_dword_m1; // s_a = a < 0 ? -1 : 0 + di_int s_b = b >> bits_in_dword_m1; // s_b = b < 0 ? -1 : 0 + a = (a ^ s_a) - s_a; // negate if s_a == -1 + b = (b ^ s_b) - s_b; // negate if s_b == -1 + s_b ^= s_a; // sign of quotient + du_int r; + di_int q = (__udivmoddi4(a, b, &r) ^ s_b) - s_b; // negate if s_b == -1 + *rem = (r ^ s_a) - s_a; // negate if s_a == -1 + return q; +} diff --git a/wasmrt/llvm-builtins/builtins/divmodsi4.c b/wasmrt/llvm-builtins/builtins/divmodsi4.c new file mode 100644 index 0000000..a85e299 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/divmodsi4.c @@ -0,0 +1,29 @@ +//===-- divmodsi4.c - Implement __divmodsi4 +//--------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __divmodsi4 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: a / b, *rem = a % b + +COMPILER_RT_ABI si_int __divmodsi4(si_int a, si_int b, si_int *rem) { + const int bits_in_word_m1 = (int)(sizeof(si_int) * CHAR_BIT) - 1; + si_int s_a = a >> bits_in_word_m1; // s_a = a < 0 ? -1 : 0 + si_int s_b = b >> bits_in_word_m1; // s_b = b < 0 ? -1 : 0 + a = (a ^ s_a) - s_a; // negate if s_a == -1 + b = (b ^ s_b) - s_b; // negate if s_b == -1 + s_b ^= s_a; // sign of quotient + su_int r; + si_int q = (__udivmodsi4(a, b, &r) ^ s_b) - s_b; // negate if s_b == -1 + *rem = (r ^ s_a) - s_a; // negate if s_a == -1 + return q; +} diff --git a/wasmrt/llvm-builtins/builtins/divmodti4.c b/wasmrt/llvm-builtins/builtins/divmodti4.c new file mode 100644 index 0000000..b243ba4 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/divmodti4.c @@ -0,0 +1,32 @@ +//===-- divmodti4.c - Implement __divmodti4 -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __divmodti4 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +// Returns: a / b, *rem = a % b + +COMPILER_RT_ABI ti_int __divmodti4(ti_int a, ti_int b, ti_int *rem) { + const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1; + ti_int s_a = a >> bits_in_tword_m1; // s_a = a < 0 ? -1 : 0 + ti_int s_b = b >> bits_in_tword_m1; // s_b = b < 0 ? -1 : 0 + a = (a ^ s_a) - s_a; // negate if s_a == -1 + b = (b ^ s_b) - s_b; // negate if s_b == -1 + s_b ^= s_a; // sign of quotient + tu_int r; + ti_int q = (__udivmodti4(a, b, &r) ^ s_b) - s_b; // negate if s_b == -1 + *rem = (r ^ s_a) - s_a; // negate if s_a == -1 + return q; +} + +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/divsc3.c b/wasmrt/llvm-builtins/builtins/divsc3.c new file mode 100644 index 0000000..aa4fd8e --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/divsc3.c @@ -0,0 +1,54 @@ +//===-- divsc3.c - Implement __divsc3 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __divsc3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" +#include "int_lib.h" +#include "int_math.h" + +// Returns: the quotient of (a + ib) / (c + id) + +COMPILER_RT_ABI Fcomplex __divsc3(float __a, float __b, float __c, float __d) { + int __ilogbw = 0; + float __logbw = + __compiler_rt_logbf(__compiler_rt_fmaxf(crt_fabsf(__c), crt_fabsf(__d))); + if (crt_isfinite(__logbw)) { + __ilogbw = (int)__logbw; + __c = __compiler_rt_scalbnf(__c, -__ilogbw); + __d = __compiler_rt_scalbnf(__d, -__ilogbw); + } + float __denom = __c * __c + __d * __d; + Fcomplex z; + COMPLEX_REAL(z) = + __compiler_rt_scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw); + COMPLEX_IMAGINARY(z) = + __compiler_rt_scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw); + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) { + if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b))) { + COMPLEX_REAL(z) = crt_copysignf(CRT_INFINITY, __c) * __a; + COMPLEX_IMAGINARY(z) = crt_copysignf(CRT_INFINITY, __c) * __b; + } else if ((crt_isinf(__a) || crt_isinf(__b)) && crt_isfinite(__c) && + crt_isfinite(__d)) { + __a = crt_copysignf(crt_isinf(__a) ? 1 : 0, __a); + __b = crt_copysignf(crt_isinf(__b) ? 1 : 0, __b); + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); + } else if (crt_isinf(__logbw) && __logbw > 0 && crt_isfinite(__a) && + crt_isfinite(__b)) { + __c = crt_copysignf(crt_isinf(__c) ? 1 : 0, __c); + __d = crt_copysignf(crt_isinf(__d) ? 1 : 0, __d); + COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d); + } + } + return z; +} diff --git a/wasmrt/llvm-builtins/builtins/divsf3.c b/wasmrt/llvm-builtins/builtins/divsf3.c new file mode 100644 index 0000000..5744c01 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/divsf3.c @@ -0,0 +1,30 @@ +//===-- lib/divsf3.c - Single-precision division ------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements single-precision soft-float division +// with the IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION + +#define NUMBER_OF_HALF_ITERATIONS 0 +#define NUMBER_OF_FULL_ITERATIONS 3 +#define USE_NATIVE_FULL_ITERATIONS + +#include "fp_div_impl.inc" + +COMPILER_RT_ABI fp_t __divsf3(fp_t a, fp_t b) { return __divXf3__(a, b); } + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_fdiv(fp_t a, fp_t b) { return __divsf3(a, b); } +#else +COMPILER_RT_ALIAS(__divsf3, __aeabi_fdiv) +#endif +#endif diff --git a/wasmrt/llvm-builtins/builtins/divsi3.c b/wasmrt/llvm-builtins/builtins/divsi3.c new file mode 100644 index 0000000..f514407 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/divsi3.c @@ -0,0 +1,30 @@ +//===-- divsi3.c - Implement __divsi3 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __divsi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: a / b + +#define fixint_t si_int +#define fixuint_t su_int +// On CPUs without unsigned hardware division support, +// this calls __udivsi3 (notice the cast to su_int). +// On CPUs with unsigned hardware division support, +// this uses the unsigned division instruction. +#define COMPUTE_UDIV(a, b) ((su_int)(a) / (su_int)(b)) +#include "int_div_impl.inc" + +COMPILER_RT_ABI si_int __divsi3(si_int a, si_int b) { return __divXi3(a, b); } + +#if defined(__ARM_EABI__) +COMPILER_RT_ALIAS(__divsi3, __aeabi_idiv) +#endif diff --git a/wasmrt/llvm-builtins/builtins/divtc3.c b/wasmrt/llvm-builtins/builtins/divtc3.c new file mode 100644 index 0000000..0e47992 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/divtc3.c @@ -0,0 +1,55 @@ +//===-- divtc3.c - Implement __divtc3 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __divtc3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" +#include "int_lib.h" +#include "int_math.h" + +// Returns: the quotient of (a + ib) / (c + id) + +COMPILER_RT_ABI Lcomplex __divtc3(long double __a, long double __b, + long double __c, long double __d) { + int __ilogbw = 0; + long double __logbw = + __compiler_rt_logbl(__compiler_rt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); + if (crt_isfinite(__logbw)) { + __ilogbw = (int)__logbw; + __c = __compiler_rt_scalbnl(__c, -__ilogbw); + __d = __compiler_rt_scalbnl(__d, -__ilogbw); + } + long double __denom = __c * __c + __d * __d; + Lcomplex z; + COMPLEX_REAL(z) = + __compiler_rt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw); + COMPLEX_IMAGINARY(z) = + __compiler_rt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw); + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) { + if ((__denom == 0.0) && (!crt_isnan(__a) || !crt_isnan(__b))) { + COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a; + COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b; + } else if ((crt_isinf(__a) || crt_isinf(__b)) && crt_isfinite(__c) && + crt_isfinite(__d)) { + __a = crt_copysignl(crt_isinf(__a) ? 1.0 : 0.0, __a); + __b = crt_copysignl(crt_isinf(__b) ? 1.0 : 0.0, __b); + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); + } else if (crt_isinf(__logbw) && __logbw > 0.0 && crt_isfinite(__a) && + crt_isfinite(__b)) { + __c = crt_copysignl(crt_isinf(__c) ? 1.0 : 0.0, __c); + __d = crt_copysignl(crt_isinf(__d) ? 1.0 : 0.0, __d); + COMPLEX_REAL(z) = 0.0 * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = 0.0 * (__b * __c - __a * __d); + } + } + return z; +} diff --git a/wasmrt/llvm-builtins/builtins/divtf3.c b/wasmrt/llvm-builtins/builtins/divtf3.c new file mode 100644 index 0000000..bd76763 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/divtf3.c @@ -0,0 +1,26 @@ +//===-- lib/divtf3.c - Quad-precision division --------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements quad-precision soft-float division +// with the IEEE-754 default rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_TF_MODE) + +#define NUMBER_OF_HALF_ITERATIONS 4 +#define NUMBER_OF_FULL_ITERATIONS 1 + +#include "fp_div_impl.inc" + +COMPILER_RT_ABI fp_t __divtf3(fp_t a, fp_t b) { return __divXf3__(a, b); } + +#endif diff --git a/wasmrt/llvm-builtins/builtins/divti3.c b/wasmrt/llvm-builtins/builtins/divti3.c new file mode 100644 index 0000000..80f2130 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/divti3.c @@ -0,0 +1,26 @@ +//===-- divti3.c - Implement __divti3 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __divti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +// Returns: a / b + +#define fixint_t ti_int +#define fixuint_t tu_int +#define COMPUTE_UDIV(a, b) __udivmodti4((a), (b), (tu_int *)0) +#include "int_div_impl.inc" + +COMPILER_RT_ABI ti_int __divti3(ti_int a, ti_int b) { return __divXi3(a, b); } + +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/divxc3.c b/wasmrt/llvm-builtins/builtins/divxc3.c new file mode 100644 index 0000000..97ffd2e --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/divxc3.c @@ -0,0 +1,55 @@ +//===-- divxc3.c - Implement __divxc3 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __divxc3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if !_ARCH_PPC + +#include "int_lib.h" +#include "int_math.h" + +// Returns: the quotient of (a + ib) / (c + id) + +COMPILER_RT_ABI Lcomplex __divxc3(long double __a, long double __b, + long double __c, long double __d) { + int __ilogbw = 0; + long double __logbw = crt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); + if (crt_isfinite(__logbw)) { + __ilogbw = (int)__logbw; + __c = crt_scalbnl(__c, -__ilogbw); + __d = crt_scalbnl(__d, -__ilogbw); + } + long double __denom = __c * __c + __d * __d; + Lcomplex z; + COMPLEX_REAL(z) = crt_scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw); + COMPLEX_IMAGINARY(z) = + crt_scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw); + if (crt_isnan(COMPLEX_REAL(z)) && crt_isnan(COMPLEX_IMAGINARY(z))) { + if ((__denom == 0) && (!crt_isnan(__a) || !crt_isnan(__b))) { + COMPLEX_REAL(z) = crt_copysignl(CRT_INFINITY, __c) * __a; + COMPLEX_IMAGINARY(z) = crt_copysignl(CRT_INFINITY, __c) * __b; + } else if ((crt_isinf(__a) || crt_isinf(__b)) && crt_isfinite(__c) && + crt_isfinite(__d)) { + __a = crt_copysignl(crt_isinf(__a) ? 1 : 0, __a); + __b = crt_copysignl(crt_isinf(__b) ? 1 : 0, __b); + COMPLEX_REAL(z) = CRT_INFINITY * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = CRT_INFINITY * (__b * __c - __a * __d); + } else if (crt_isinf(__logbw) && __logbw > 0 && crt_isfinite(__a) && + crt_isfinite(__b)) { + __c = crt_copysignl(crt_isinf(__c) ? 1 : 0, __c); + __d = crt_copysignl(crt_isinf(__d) ? 1 : 0, __d); + COMPLEX_REAL(z) = 0 * (__a * __c + __b * __d); + COMPLEX_IMAGINARY(z) = 0 * (__b * __c - __a * __d); + } + } + return z; +} + +#endif diff --git a/wasmrt/llvm-builtins/builtins/emutls.c b/wasmrt/llvm-builtins/builtins/emutls.c new file mode 100644 index 0000000..390ffb2 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/emutls.c @@ -0,0 +1,408 @@ +//===---------- emutls.c - Implements __emutls_get_address ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include + +#include "int_lib.h" + +#ifdef __BIONIC__ +// There are 4 pthread key cleanup rounds on Bionic. Delay emutls deallocation +// to round 2. We need to delay deallocation because: +// - Android versions older than M lack __cxa_thread_atexit_impl, so apps +// use a pthread key destructor to call C++ destructors. +// - Apps might use __thread/thread_local variables in pthread destructors. +// We can't wait until the final two rounds, because jemalloc needs two rounds +// after the final malloc/free call to free its thread-specific data (see +// https://reviews.llvm.org/D46978#1107507). +#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 1 +#else +#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 0 +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +// MSVC raises a warning about a nonstandard extension being used for the 0 +// sized element in this array. Disable this for warn-as-error builds. +#pragma warning(push) +#pragma warning(disable : 4200) +#endif + +typedef struct emutls_address_array { + uintptr_t skip_destructor_rounds; + uintptr_t size; // number of elements in the 'data' array + void *data[]; +} emutls_address_array; + +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(pop) +#endif + +static void emutls_shutdown(emutls_address_array *array); + +#ifndef _WIN32 + +#include + +static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_key_t emutls_pthread_key; +static bool emutls_key_created = false; + +typedef unsigned int gcc_word __attribute__((mode(word))); +typedef unsigned int gcc_pointer __attribute__((mode(pointer))); + +// Default is not to use posix_memalign, so systems like Android +// can use thread local data without heavier POSIX memory allocators. +#ifndef EMUTLS_USE_POSIX_MEMALIGN +#define EMUTLS_USE_POSIX_MEMALIGN 0 +#endif + +static __inline void *emutls_memalign_alloc(size_t align, size_t size) { + void *base; +#if EMUTLS_USE_POSIX_MEMALIGN + if (posix_memalign(&base, align, size) != 0) + abort(); +#else +#define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void *)) + char *object; + if ((object = (char *)malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL) + abort(); + base = (void *)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES)) & + ~(uintptr_t)(align - 1)); + + ((void **)base)[-1] = object; +#endif + return base; +} + +static __inline void emutls_memalign_free(void *base) { +#if EMUTLS_USE_POSIX_MEMALIGN + free(base); +#else + // The mallocated address is in ((void**)base)[-1] + free(((void **)base)[-1]); +#endif +} + +static __inline void emutls_setspecific(emutls_address_array *value) { + pthread_setspecific(emutls_pthread_key, (void *)value); +} + +static __inline emutls_address_array *emutls_getspecific(void) { + return (emutls_address_array *)pthread_getspecific(emutls_pthread_key); +} + +static void emutls_key_destructor(void *ptr) { + emutls_address_array *array = (emutls_address_array *)ptr; + if (array->skip_destructor_rounds > 0) { + // emutls is deallocated using a pthread key destructor. These + // destructors are called in several rounds to accommodate destructor + // functions that (re)initialize key values with pthread_setspecific. + // Delay the emutls deallocation to accommodate other end-of-thread + // cleanup tasks like calling thread_local destructors (e.g. the + // __cxa_thread_atexit fallback in libc++abi). + array->skip_destructor_rounds--; + emutls_setspecific(array); + } else { + emutls_shutdown(array); + free(ptr); + } +} + +static __inline void emutls_init(void) { + if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0) + abort(); + emutls_key_created = true; +} + +static __inline void emutls_init_once(void) { + static pthread_once_t once = PTHREAD_ONCE_INIT; + pthread_once(&once, emutls_init); +} + +static __inline void emutls_lock(void) { pthread_mutex_lock(&emutls_mutex); } + +static __inline void emutls_unlock(void) { pthread_mutex_unlock(&emutls_mutex); } + +#else // _WIN32 + +#include +#include +#include +#include + +static LPCRITICAL_SECTION emutls_mutex; +static DWORD emutls_tls_index = TLS_OUT_OF_INDEXES; + +typedef uintptr_t gcc_word; +typedef void *gcc_pointer; + +static void win_error(DWORD last_err, const char *hint) { + char *buffer = NULL; + if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_MAX_WIDTH_MASK, + NULL, last_err, 0, (LPSTR)&buffer, 1, NULL)) { + fprintf(stderr, "Windows error: %s\n", buffer); + } else { + fprintf(stderr, "Unknown Windows error: %s\n", hint); + } + LocalFree(buffer); +} + +static __inline void win_abort(DWORD last_err, const char *hint) { + win_error(last_err, hint); + abort(); +} + +static __inline void *emutls_memalign_alloc(size_t align, size_t size) { + void *base = _aligned_malloc(size, align); + if (!base) + win_abort(GetLastError(), "_aligned_malloc"); + return base; +} + +static __inline void emutls_memalign_free(void *base) { _aligned_free(base); } + +static void emutls_exit(void) { + if (emutls_mutex) { + DeleteCriticalSection(emutls_mutex); + _aligned_free(emutls_mutex); + emutls_mutex = NULL; + } + if (emutls_tls_index != TLS_OUT_OF_INDEXES) { + emutls_shutdown((emutls_address_array *)TlsGetValue(emutls_tls_index)); + TlsFree(emutls_tls_index); + emutls_tls_index = TLS_OUT_OF_INDEXES; + } +} + +static BOOL CALLBACK emutls_init(PINIT_ONCE p0, PVOID p1, PVOID *p2) { + (void)p0; + (void)p1; + (void)p2; + emutls_mutex = + (LPCRITICAL_SECTION)_aligned_malloc(sizeof(CRITICAL_SECTION), 16); + if (!emutls_mutex) { + win_error(GetLastError(), "_aligned_malloc"); + return FALSE; + } + InitializeCriticalSection(emutls_mutex); + + emutls_tls_index = TlsAlloc(); + if (emutls_tls_index == TLS_OUT_OF_INDEXES) { + emutls_exit(); + win_error(GetLastError(), "TlsAlloc"); + return FALSE; + } + atexit(&emutls_exit); + return TRUE; +} + +static __inline void emutls_init_once(void) { + static INIT_ONCE once; + InitOnceExecuteOnce(&once, emutls_init, NULL, NULL); +} + +static __inline void emutls_lock(void) { EnterCriticalSection(emutls_mutex); } + +static __inline void emutls_unlock(void) { LeaveCriticalSection(emutls_mutex); } + +static __inline void emutls_setspecific(emutls_address_array *value) { + if (TlsSetValue(emutls_tls_index, (LPVOID)value) == 0) + win_abort(GetLastError(), "TlsSetValue"); +} + +static __inline emutls_address_array *emutls_getspecific(void) { + LPVOID value = TlsGetValue(emutls_tls_index); + if (value == NULL) { + const DWORD err = GetLastError(); + if (err != ERROR_SUCCESS) + win_abort(err, "TlsGetValue"); + } + return (emutls_address_array *)value; +} + +// Provide atomic load/store functions for emutls_get_index if built with MSVC. +#if !defined(__ATOMIC_RELEASE) +#include + +enum { __ATOMIC_ACQUIRE = 2, __ATOMIC_RELEASE = 3 }; + +static __inline uintptr_t __atomic_load_n(void *ptr, unsigned type) { + assert(type == __ATOMIC_ACQUIRE); + // These return the previous value - but since we do an OR with 0, + // it's equivalent to a plain load. +#ifdef _WIN64 + return InterlockedOr64(ptr, 0); +#else + return InterlockedOr(ptr, 0); +#endif +} + +static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) { + assert(type == __ATOMIC_RELEASE); + InterlockedExchangePointer((void *volatile *)ptr, (void *)val); +} + +#endif // __ATOMIC_RELEASE + +#endif // _WIN32 + +static size_t emutls_num_object = 0; // number of allocated TLS objects + +// Free the allocated TLS data +static void emutls_shutdown(emutls_address_array *array) { + if (array) { + uintptr_t i; + for (i = 0; i < array->size; ++i) { + if (array->data[i]) + emutls_memalign_free(array->data[i]); + } + } +} + +// For every TLS variable xyz, +// there is one __emutls_control variable named __emutls_v.xyz. +// If xyz has non-zero initial value, __emutls_v.xyz's "value" +// will point to __emutls_t.xyz, which has the initial value. +typedef struct __emutls_control { + // Must use gcc_word here, instead of size_t, to match GCC. When + // gcc_word is larger than size_t, the upper extra bits are all + // zeros. We can use variables of size_t to operate on size and + // align. + gcc_word size; // size of the object in bytes + gcc_word align; // alignment of the object in bytes + union { + uintptr_t index; // data[index-1] is the object address + void *address; // object address, when in single thread env + } object; + void *value; // null or non-zero initial value for the object +} __emutls_control; + +// Emulated TLS objects are always allocated at run-time. +static __inline void *emutls_allocate_object(__emutls_control *control) { + // Use standard C types, check with gcc's emutls.o. + COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer)); + COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void *)); + + size_t size = control->size; + size_t align = control->align; + void *base; + if (align < sizeof(void *)) + align = sizeof(void *); + // Make sure that align is power of 2. + if ((align & (align - 1)) != 0) + abort(); + + base = emutls_memalign_alloc(align, size); + if (control->value) + memcpy(base, control->value, size); + else + memset(base, 0, size); + return base; +} + +// Returns control->object.index; set index if not allocated yet. +static __inline uintptr_t emutls_get_index(__emutls_control *control) { + uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE); + if (!index) { + emutls_init_once(); + emutls_lock(); + index = control->object.index; + if (!index) { + index = ++emutls_num_object; + __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE); + } + emutls_unlock(); + } + return index; +} + +// Updates newly allocated thread local emutls_address_array. +static __inline void emutls_check_array_set_size(emutls_address_array *array, + uintptr_t size) { + if (array == NULL) + abort(); + array->size = size; + emutls_setspecific(array); +} + +// Returns the new 'data' array size, number of elements, +// which must be no smaller than the given index. +static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) { + // Need to allocate emutls_address_array with extra slots + // to store the header. + // Round up the emutls_address_array size to multiple of 16. + uintptr_t header_words = sizeof(emutls_address_array) / sizeof(void *); + return ((index + header_words + 15) & ~((uintptr_t)15)) - header_words; +} + +// Returns the size in bytes required for an emutls_address_array with +// N number of elements for data field. +static __inline uintptr_t emutls_asize(uintptr_t N) { + return N * sizeof(void *) + sizeof(emutls_address_array); +} + +// Returns the thread local emutls_address_array. +// Extends its size if necessary to hold address at index. +static __inline emutls_address_array * +emutls_get_address_array(uintptr_t index) { + emutls_address_array *array = emutls_getspecific(); + if (array == NULL) { + uintptr_t new_size = emutls_new_data_array_size(index); + array = (emutls_address_array *)malloc(emutls_asize(new_size)); + if (array) { + memset(array->data, 0, new_size * sizeof(void *)); + array->skip_destructor_rounds = EMUTLS_SKIP_DESTRUCTOR_ROUNDS; + } + emutls_check_array_set_size(array, new_size); + } else if (index > array->size) { + uintptr_t orig_size = array->size; + uintptr_t new_size = emutls_new_data_array_size(index); + array = (emutls_address_array *)realloc(array, emutls_asize(new_size)); + if (array) + memset(array->data + orig_size, 0, + (new_size - orig_size) * sizeof(void *)); + emutls_check_array_set_size(array, new_size); + } + return array; +} + +#ifndef _WIN32 +// Our emulated TLS implementation relies on local state (e.g. for the pthread +// key), and if we duplicate this state across different shared libraries, +// accesses to the same TLS variable from different shared libraries will yield +// different results (see https://github.com/android/ndk/issues/1551 for an +// example). __emutls_get_address is the only external entry point for emulated +// TLS, and by making it default visibility and weak, we can rely on the dynamic +// linker to coalesce multiple copies at runtime and ensure a single unique copy +// of TLS state. This is a best effort; it won't work if the user is linking +// with -Bsymbolic or -Bsymbolic-functions, and it also won't work on Windows, +// where the dynamic linker has no notion of coalescing weak symbols at runtime. +// A more robust solution would be to create a separate shared library for +// emulated TLS, to ensure a single copy of its state. +__attribute__((visibility("default"), weak)) +#endif +void *__emutls_get_address(__emutls_control *control) { + uintptr_t index = emutls_get_index(control); + emutls_address_array *array = emutls_get_address_array(index--); + if (array->data[index] == NULL) + array->data[index] = emutls_allocate_object(control); + return array->data[index]; +} + +#ifdef __BIONIC__ +// Called by Bionic on dlclose to delete the emutls pthread key. +__attribute__((visibility("hidden"))) void __emutls_unregister_key(void) { + if (emutls_key_created) { + pthread_key_delete(emutls_pthread_key); + emutls_key_created = false; + } +} +#endif diff --git a/wasmrt/llvm-builtins/builtins/enable_execute_stack.c b/wasmrt/llvm-builtins/builtins/enable_execute_stack.c new file mode 100644 index 0000000..e18de4e --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/enable_execute_stack.c @@ -0,0 +1,67 @@ +//===-- enable_execute_stack.c - Implement __enable_execute_stack ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifndef _WIN32 +#include +#endif + +// #include "config.h" +// FIXME: CMake - include when cmake system is ready. +// Remove #define HAVE_SYSCONF 1 line. +#define HAVE_SYSCONF 1 + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#else +#ifndef __APPLE__ +#include +#endif // __APPLE__ +#endif // _WIN32 + +#if __LP64__ +#define TRAMPOLINE_SIZE 48 +#else +#define TRAMPOLINE_SIZE 40 +#endif + +// The compiler generates calls to __enable_execute_stack() when creating +// trampoline functions on the stack for use with nested functions. +// It is expected to mark the page(s) containing the address +// and the next 48 bytes as executable. Since the stack is normally rw- +// that means changing the protection on those page(s) to rwx. + +COMPILER_RT_ABI void __enable_execute_stack(void *addr) { + +#if _WIN32 + MEMORY_BASIC_INFORMATION mbi; + if (!VirtualQuery(addr, &mbi, sizeof(mbi))) + return; // We should probably assert here because there is no return value + VirtualProtect(mbi.BaseAddress, mbi.RegionSize, PAGE_EXECUTE_READWRITE, + &mbi.Protect); +#else +#if __APPLE__ + // On Darwin, pagesize is always 4096 bytes + const uintptr_t pageSize = 4096; +#elif !defined(HAVE_SYSCONF) +#error "HAVE_SYSCONF not defined! See enable_execute_stack.c" +#else + const uintptr_t pageSize = sysconf(_SC_PAGESIZE); +#endif // __APPLE__ + + const uintptr_t pageAlignMask = ~(pageSize - 1); + uintptr_t p = (uintptr_t)addr; + unsigned char *startPage = (unsigned char *)(p & pageAlignMask); + unsigned char *endPage = + (unsigned char *)((p + TRAMPOLINE_SIZE + pageSize) & pageAlignMask); + size_t length = endPage - startPage; + (void)mprotect((void *)startPage, length, PROT_READ | PROT_WRITE | PROT_EXEC); +#endif +} diff --git a/wasmrt/llvm-builtins/builtins/eprintf.c b/wasmrt/llvm-builtins/builtins/eprintf.c new file mode 100644 index 0000000..daf90b4 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/eprintf.c @@ -0,0 +1,29 @@ +//===---------- eprintf.c - Implements __eprintf --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include + +// __eprintf() was used in an old version of . +// It can eventually go away, but it is needed when linking +// .o files built with the old . +// +// It should never be exported from a dylib, so it is marked +// visibility hidden. +#ifndef DONT_DEFINE_EPRINTF +#ifndef _WIN32 +__attribute__((visibility("hidden"))) +#endif +COMPILER_RT_ABI void +__eprintf(const char *format, const char *assertion_expression, + const char *line, const char *file) { + fprintf(stderr, format, assertion_expression, line, file); + fflush(stderr); + compilerrt_abort(); +} +#endif diff --git a/wasmrt/llvm-builtins/builtins/extenddftf2.c b/wasmrt/llvm-builtins/builtins/extenddftf2.c new file mode 100644 index 0000000..835076b --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/extenddftf2.c @@ -0,0 +1,21 @@ +//===-- lib/extenddftf2.c - double -> quad conversion -------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_TF_MODE) +#define SRC_DOUBLE +#define DST_QUAD +#include "fp_extend_impl.inc" + +COMPILER_RT_ABI fp_t __extenddftf2(double a) { + return __extendXfYf2__(a); +} + +#endif diff --git a/wasmrt/llvm-builtins/builtins/extendhfsf2.c b/wasmrt/llvm-builtins/builtins/extendhfsf2.c new file mode 100644 index 0000000..0159ab0 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/extendhfsf2.c @@ -0,0 +1,27 @@ +//===-- lib/extendhfsf2.c - half -> single conversion -------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SRC_HALF +#define DST_SINGLE +#include "fp_extend_impl.inc" + +// Use a forwarding definition and noinline to implement a poor man's alias, +// as there isn't a good cross-platform way of defining one. +COMPILER_RT_ABI NOINLINE float __extendhfsf2(src_t a) { + return __extendXfYf2__(a); +} + +COMPILER_RT_ABI float __gnu_h2f_ieee(src_t a) { return __extendhfsf2(a); } + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI float __aeabi_h2f(src_t a) { return __extendhfsf2(a); } +#else +COMPILER_RT_ALIAS(__extendhfsf2, __aeabi_h2f) +#endif +#endif diff --git a/wasmrt/llvm-builtins/builtins/extendhftf2.c b/wasmrt/llvm-builtins/builtins/extendhftf2.c new file mode 100644 index 0000000..a2cb0f7 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/extendhftf2.c @@ -0,0 +1,22 @@ +//===-- lib/extendhftf2.c - half -> quad conversion ---------------*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_TF_MODE) && defined(COMPILER_RT_HAS_FLOAT16) +#define SRC_HALF +#define DST_QUAD +#include "fp_extend_impl.inc" + +COMPILER_RT_ABI long double __extendhftf2(_Float16 a) { + return __extendXfYf2__(a); +} + +#endif diff --git a/wasmrt/llvm-builtins/builtins/extendsfdf2.c b/wasmrt/llvm-builtins/builtins/extendsfdf2.c new file mode 100644 index 0000000..8132d57 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/extendsfdf2.c @@ -0,0 +1,21 @@ +//===-- lib/extendsfdf2.c - single -> double conversion -----------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SRC_SINGLE +#define DST_DOUBLE +#include "fp_extend_impl.inc" + +COMPILER_RT_ABI double __extendsfdf2(float a) { return __extendXfYf2__(a); } + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI double __aeabi_f2d(float a) { return __extendsfdf2(a); } +#else +COMPILER_RT_ALIAS(__extendsfdf2, __aeabi_f2d) +#endif +#endif diff --git a/wasmrt/llvm-builtins/builtins/extendsftf2.c b/wasmrt/llvm-builtins/builtins/extendsftf2.c new file mode 100644 index 0000000..0739859 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/extendsftf2.c @@ -0,0 +1,21 @@ +//===-- lib/extendsftf2.c - single -> quad conversion -------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_TF_MODE) +#define SRC_SINGLE +#define DST_QUAD +#include "fp_extend_impl.inc" + +COMPILER_RT_ABI fp_t __extendsftf2(float a) { + return __extendXfYf2__(a); +} + +#endif diff --git a/wasmrt/llvm-builtins/builtins/ffsdi2.c b/wasmrt/llvm-builtins/builtins/ffsdi2.c new file mode 100644 index 0000000..beae553 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/ffsdi2.c @@ -0,0 +1,27 @@ +//===-- ffsdi2.c - Implement __ffsdi2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __ffsdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: the index of the least significant 1-bit in a, or +// the value zero if a is zero. The least significant bit is index one. + +COMPILER_RT_ABI int __ffsdi2(di_int a) { + dwords x; + x.all = a; + if (x.s.low == 0) { + if (x.s.high == 0) + return 0; + return ctzsi(x.s.high) + (1 + sizeof(si_int) * CHAR_BIT); + } + return ctzsi(x.s.low) + 1; +} diff --git a/wasmrt/llvm-builtins/builtins/ffssi2.c b/wasmrt/llvm-builtins/builtins/ffssi2.c new file mode 100644 index 0000000..ddb5292 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/ffssi2.c @@ -0,0 +1,23 @@ +//===-- ffssi2.c - Implement __ffssi2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __ffssi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: the index of the least significant 1-bit in a, or +// the value zero if a is zero. The least significant bit is index one. + +COMPILER_RT_ABI int __ffssi2(si_int a) { + if (a == 0) { + return 0; + } + return ctzsi(a) + 1; +} diff --git a/wasmrt/llvm-builtins/builtins/ffsti2.c b/wasmrt/llvm-builtins/builtins/ffsti2.c new file mode 100644 index 0000000..a2177d1 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/ffsti2.c @@ -0,0 +1,31 @@ +//===-- ffsti2.c - Implement __ffsti2 -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __ffsti2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +// Returns: the index of the least significant 1-bit in a, or +// the value zero if a is zero. The least significant bit is index one. + +COMPILER_RT_ABI int __ffsti2(ti_int a) { + twords x; + x.all = a; + if (x.s.low == 0) { + if (x.s.high == 0) + return 0; + return __builtin_ctzll(x.s.high) + (1 + sizeof(di_int) * CHAR_BIT); + } + return __builtin_ctzll(x.s.low) + 1; +} + +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/fixdfdi.c b/wasmrt/llvm-builtins/builtins/fixdfdi.c new file mode 100644 index 0000000..a48facb --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixdfdi.c @@ -0,0 +1,48 @@ +//===-- fixdfdi.c - Implement __fixdfdi -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +#ifndef __SOFTFP__ +// Support for systems that have hardware floating-point; can set the invalid +// flag as a side-effect of computation. + +COMPILER_RT_ABI du_int __fixunsdfdi(double a); + +COMPILER_RT_ABI di_int __fixdfdi(double a) { + if (a < 0.0) { + return -__fixunsdfdi(-a); + } + return __fixunsdfdi(a); +} + +#else +// Support for systems that don't have hardware floating-point; there are no +// flags to set, and we don't want to code-gen to an unknown soft-float +// implementation. + +typedef di_int fixint_t; +typedef du_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI di_int __fixdfdi(fp_t a) { return __fixint(a); } + +#endif + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI di_int __aeabi_d2lz(fp_t a) { return __fixdfdi(a); } +#else +COMPILER_RT_ALIAS(__fixdfdi, __aeabi_d2lz) +#endif +#endif + +#if defined(__MINGW32__) && defined(__arm__) +COMPILER_RT_ALIAS(__fixdfdi, __dtoi64) +#endif diff --git a/wasmrt/llvm-builtins/builtins/fixdfsi.c b/wasmrt/llvm-builtins/builtins/fixdfsi.c new file mode 100644 index 0000000..f546499 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixdfsi.c @@ -0,0 +1,23 @@ +//===-- fixdfsi.c - Implement __fixdfsi -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" +typedef si_int fixint_t; +typedef su_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI si_int __fixdfsi(fp_t a) { return __fixint(a); } + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI si_int __aeabi_d2iz(fp_t a) { return __fixdfsi(a); } +#else +COMPILER_RT_ALIAS(__fixdfsi, __aeabi_d2iz) +#endif +#endif diff --git a/wasmrt/llvm-builtins/builtins/fixdfti.c b/wasmrt/llvm-builtins/builtins/fixdfti.c new file mode 100644 index 0000000..90ca895 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixdfti.c @@ -0,0 +1,21 @@ +//===-- fixdfti.c - Implement __fixdfti -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT +#define DOUBLE_PRECISION +#include "fp_lib.h" + +typedef ti_int fixint_t; +typedef tu_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI ti_int __fixdfti(fp_t a) { return __fixint(a); } + +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/fixsfdi.c b/wasmrt/llvm-builtins/builtins/fixsfdi.c new file mode 100644 index 0000000..3a66fb9 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixsfdi.c @@ -0,0 +1,48 @@ +//===-- fixsfdi.c - Implement __fixsfdi -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +#ifndef __SOFTFP__ +// Support for systems that have hardware floating-point; can set the invalid +// flag as a side-effect of computation. + +COMPILER_RT_ABI du_int __fixunssfdi(float a); + +COMPILER_RT_ABI di_int __fixsfdi(float a) { + if (a < 0.0f) { + return -__fixunssfdi(-a); + } + return __fixunssfdi(a); +} + +#else +// Support for systems that don't have hardware floating-point; there are no +// flags to set, and we don't want to code-gen to an unknown soft-float +// implementation. + +typedef di_int fixint_t; +typedef du_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI di_int __fixsfdi(fp_t a) { return __fixint(a); } + +#endif + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI di_int __aeabi_f2lz(fp_t a) { return __fixsfdi(a); } +#else +COMPILER_RT_ALIAS(__fixsfdi, __aeabi_f2lz) +#endif +#endif + +#if defined(__MINGW32__) && defined(__arm__) +COMPILER_RT_ALIAS(__fixsfdi, __stoi64) +#endif diff --git a/wasmrt/llvm-builtins/builtins/fixsfsi.c b/wasmrt/llvm-builtins/builtins/fixsfsi.c new file mode 100644 index 0000000..d83d7e7 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixsfsi.c @@ -0,0 +1,23 @@ +//===-- fixsfsi.c - Implement __fixsfsi -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" +typedef si_int fixint_t; +typedef su_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI si_int __fixsfsi(fp_t a) { return __fixint(a); } + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI si_int __aeabi_f2iz(fp_t a) { return __fixsfsi(a); } +#else +COMPILER_RT_ALIAS(__fixsfsi, __aeabi_f2iz) +#endif +#endif diff --git a/wasmrt/llvm-builtins/builtins/fixsfti.c b/wasmrt/llvm-builtins/builtins/fixsfti.c new file mode 100644 index 0000000..3c01b75 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixsfti.c @@ -0,0 +1,21 @@ +//===-- fixsfti.c - Implement __fixsfti -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT +#define SINGLE_PRECISION +#include "fp_lib.h" + +typedef ti_int fixint_t; +typedef tu_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI ti_int __fixsfti(fp_t a) { return __fixint(a); } + +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/fixtfdi.c b/wasmrt/llvm-builtins/builtins/fixtfdi.c new file mode 100644 index 0000000..d27a99b --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixtfdi.c @@ -0,0 +1,18 @@ +//===-- fixtfdi.c - Implement __fixtfdi -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_TF_MODE) +typedef di_int fixint_t; +typedef du_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI di_int __fixtfdi(fp_t a) { return __fixint(a); } +#endif diff --git a/wasmrt/llvm-builtins/builtins/fixtfsi.c b/wasmrt/llvm-builtins/builtins/fixtfsi.c new file mode 100644 index 0000000..01e352a --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixtfsi.c @@ -0,0 +1,18 @@ +//===-- fixtfsi.c - Implement __fixtfsi -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_TF_MODE) +typedef si_int fixint_t; +typedef su_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI si_int __fixtfsi(fp_t a) { return __fixint(a); } +#endif diff --git a/wasmrt/llvm-builtins/builtins/fixtfti.c b/wasmrt/llvm-builtins/builtins/fixtfti.c new file mode 100644 index 0000000..491fca5 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixtfti.c @@ -0,0 +1,18 @@ +//===-- fixtfti.c - Implement __fixtfti -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_TF_MODE) +typedef ti_int fixint_t; +typedef tu_int fixuint_t; +#include "fp_fixint_impl.inc" + +COMPILER_RT_ABI ti_int __fixtfti(fp_t a) { return __fixint(a); } +#endif diff --git a/wasmrt/llvm-builtins/builtins/fixunsdfdi.c b/wasmrt/llvm-builtins/builtins/fixunsdfdi.c new file mode 100644 index 0000000..f15f867 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixunsdfdi.c @@ -0,0 +1,46 @@ +//===-- fixunsdfdi.c - Implement __fixunsdfdi -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +#ifndef __SOFTFP__ +// Support for systems that have hardware floating-point; can set the invalid +// flag as a side-effect of computation. + +COMPILER_RT_ABI du_int __fixunsdfdi(double a) { + if (a <= 0.0) + return 0; + su_int high = a / 4294967296.f; // a / 0x1p32f; + su_int low = a - (double)high * 4294967296.f; // high * 0x1p32f; + return ((du_int)high << 32) | low; +} + +#else +// Support for systems that don't have hardware floating-point; there are no +// flags to set, and we don't want to code-gen to an unknown soft-float +// implementation. + +typedef du_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI du_int __fixunsdfdi(fp_t a) { return __fixuint(a); } + +#endif + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI du_int __aeabi_d2ulz(fp_t a) { return __fixunsdfdi(a); } +#else +COMPILER_RT_ALIAS(__fixunsdfdi, __aeabi_d2ulz) +#endif +#endif + +#if defined(__MINGW32__) && defined(__arm__) +COMPILER_RT_ALIAS(__fixunsdfdi, __dtou64) +#endif diff --git a/wasmrt/llvm-builtins/builtins/fixunsdfsi.c b/wasmrt/llvm-builtins/builtins/fixunsdfsi.c new file mode 100644 index 0000000..3db2ade --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixunsdfsi.c @@ -0,0 +1,22 @@ +//===-- fixunsdfsi.c - Implement __fixunsdfsi -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" +typedef su_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI su_int __fixunsdfsi(fp_t a) { return __fixuint(a); } + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI su_int __aeabi_d2uiz(fp_t a) { return __fixunsdfsi(a); } +#else +COMPILER_RT_ALIAS(__fixunsdfsi, __aeabi_d2uiz) +#endif +#endif diff --git a/wasmrt/llvm-builtins/builtins/fixunsdfti.c b/wasmrt/llvm-builtins/builtins/fixunsdfti.c new file mode 100644 index 0000000..be497d0 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixunsdfti.c @@ -0,0 +1,18 @@ +//===-- fixunsdfti.c - Implement __fixunsdfti -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT +#define DOUBLE_PRECISION +#include "fp_lib.h" +typedef tu_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI tu_int __fixunsdfti(fp_t a) { return __fixuint(a); } +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/fixunssfdi.c b/wasmrt/llvm-builtins/builtins/fixunssfdi.c new file mode 100644 index 0000000..e8f600d --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixunssfdi.c @@ -0,0 +1,47 @@ +//===-- fixunssfdi.c - Implement __fixunssfdi -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +#ifndef __SOFTFP__ +// Support for systems that have hardware floating-point; can set the invalid +// flag as a side-effect of computation. + +COMPILER_RT_ABI du_int __fixunssfdi(float a) { + if (a <= 0.0f) + return 0; + double da = a; + su_int high = da / 4294967296.f; // da / 0x1p32f; + su_int low = da - (double)high * 4294967296.f; // high * 0x1p32f; + return ((du_int)high << 32) | low; +} + +#else +// Support for systems that don't have hardware floating-point; there are no +// flags to set, and we don't want to code-gen to an unknown soft-float +// implementation. + +typedef du_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI du_int __fixunssfdi(fp_t a) { return __fixuint(a); } + +#endif + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI du_int __aeabi_f2ulz(fp_t a) { return __fixunssfdi(a); } +#else +COMPILER_RT_ALIAS(__fixunssfdi, __aeabi_f2ulz) +#endif +#endif + +#if defined(__MINGW32__) && defined(__arm__) +COMPILER_RT_ALIAS(__fixunssfdi, __stou64) +#endif diff --git a/wasmrt/llvm-builtins/builtins/fixunssfsi.c b/wasmrt/llvm-builtins/builtins/fixunssfsi.c new file mode 100644 index 0000000..738c1bb --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixunssfsi.c @@ -0,0 +1,26 @@ +//===-- fixunssfsi.c - Implement __fixunssfsi -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixunssfsi for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" +typedef su_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI su_int __fixunssfsi(fp_t a) { return __fixuint(a); } + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI su_int __aeabi_f2uiz(fp_t a) { return __fixunssfsi(a); } +#else +COMPILER_RT_ALIAS(__fixunssfsi, __aeabi_f2uiz) +#endif +#endif diff --git a/wasmrt/llvm-builtins/builtins/fixunssfti.c b/wasmrt/llvm-builtins/builtins/fixunssfti.c new file mode 100644 index 0000000..5525d77 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixunssfti.c @@ -0,0 +1,21 @@ +//===-- fixunssfti.c - Implement __fixunssfti -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixunssfti for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_128BIT) +typedef tu_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI tu_int __fixunssfti(fp_t a) { return __fixuint(a); } +#endif diff --git a/wasmrt/llvm-builtins/builtins/fixunstfdi.c b/wasmrt/llvm-builtins/builtins/fixunstfdi.c new file mode 100644 index 0000000..febdb8f --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixunstfdi.c @@ -0,0 +1,17 @@ +//===-- fixunstfdi.c - Implement __fixunstfdi -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_TF_MODE) +typedef du_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI du_int __fixunstfdi(fp_t a) { return __fixuint(a); } +#endif diff --git a/wasmrt/llvm-builtins/builtins/fixunstfsi.c b/wasmrt/llvm-builtins/builtins/fixunstfsi.c new file mode 100644 index 0000000..4efc387 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixunstfsi.c @@ -0,0 +1,17 @@ +//===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_TF_MODE) +typedef su_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI su_int __fixunstfsi(fp_t a) { return __fixuint(a); } +#endif diff --git a/wasmrt/llvm-builtins/builtins/fixunstfti.c b/wasmrt/llvm-builtins/builtins/fixunstfti.c new file mode 100644 index 0000000..fa9e7aa --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixunstfti.c @@ -0,0 +1,17 @@ +//===-- fixunstfsi.c - Implement __fixunstfsi -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_TF_MODE) +typedef tu_int fixuint_t; +#include "fp_fixuint_impl.inc" + +COMPILER_RT_ABI tu_int __fixunstfti(fp_t a) { return __fixuint(a); } +#endif diff --git a/wasmrt/llvm-builtins/builtins/fixunsxfdi.c b/wasmrt/llvm-builtins/builtins/fixunsxfdi.c new file mode 100644 index 0000000..c8a8061 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixunsxfdi.c @@ -0,0 +1,50 @@ +//===-- fixunsxfdi.c - Implement __fixunsxfdi -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixunsxfdi for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if !_ARCH_PPC + +#include "int_lib.h" + +// Returns: convert a to a unsigned long long, rounding toward zero. +// Negative values all become zero. + +// Assumption: long double is an intel 80 bit floating point type padded with 6 +// bytes du_int is a 64 bit integral type value in long double is representable +// in du_int or is negative (no range checking performed) + +// gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee +// eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm +// mmmm mmmm mmmm + +#if defined(_MSC_VER) && !defined(__clang__) +// MSVC throws a warning about 'uninitialized variable use' here, +// disable it for builds that warn-as-error +#pragma warning(push) +#pragma warning(disable : 4700) +#endif + +COMPILER_RT_ABI du_int __fixunsxfdi(long double a) { + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0 || (fb.u.high.s.low & 0x00008000)) + return 0; + if ((unsigned)e > sizeof(du_int) * CHAR_BIT) + return ~(du_int)0; + return fb.u.low.all >> (63 - e); +} + +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(pop) +#endif + +#endif //!_ARCH_PPC diff --git a/wasmrt/llvm-builtins/builtins/fixunsxfsi.c b/wasmrt/llvm-builtins/builtins/fixunsxfsi.c new file mode 100644 index 0000000..154abcb --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixunsxfsi.c @@ -0,0 +1,50 @@ +//===-- fixunsxfsi.c - Implement __fixunsxfsi -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixunsxfsi for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if !_ARCH_PPC + +#include "int_lib.h" + +// Returns: convert a to a unsigned int, rounding toward zero. +// Negative values all become zero. + +// Assumption: long double is an intel 80 bit floating point type padded with 6 +// bytes su_int is a 32 bit integral type value in long double is representable +// in su_int or is negative + +// gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee +// eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm +// mmmm mmmm mmmm + +#if defined(_MSC_VER) && !defined(__clang__) +// MSVC throws a warning about 'uninitialized variable use' here, +// disable it for builds that warn-as-error +#pragma warning(push) +#pragma warning(disable : 4700) +#endif + +COMPILER_RT_ABI su_int __fixunsxfsi(long double a) { + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0 || (fb.u.high.s.low & 0x00008000)) + return 0; + if ((unsigned)e > sizeof(su_int) * CHAR_BIT) + return ~(su_int)0; + return fb.u.low.s.high >> (31 - e); +} + +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(pop) +#endif + +#endif // !_ARCH_PPC diff --git a/wasmrt/llvm-builtins/builtins/fixunsxfti.c b/wasmrt/llvm-builtins/builtins/fixunsxfti.c new file mode 100644 index 0000000..508554e --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixunsxfti.c @@ -0,0 +1,44 @@ +//===-- fixunsxfti.c - Implement __fixunsxfti -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixunsxfti for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +// Returns: convert a to a unsigned long long, rounding toward zero. +// Negative values all become zero. + +// Assumption: long double is an intel 80 bit floating point type padded with 6 +// bytes tu_int is a 128 bit integral type value in long double is representable +// in tu_int or is negative + +// gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee +// eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm +// mmmm mmmm mmmm + +COMPILER_RT_ABI tu_int __fixunsxfti(long double a) { + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0 || (fb.u.high.s.low & 0x00008000)) + return 0; + if ((unsigned)e > sizeof(tu_int) * CHAR_BIT) + return ~(tu_int)0; + tu_int r = fb.u.low.all; + if (e > 63) + r <<= (e - 63); + else + r >>= (63 - e); + return r; +} + +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/fixxfdi.c b/wasmrt/llvm-builtins/builtins/fixxfdi.c new file mode 100644 index 0000000..86cf376 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixxfdi.c @@ -0,0 +1,54 @@ +//===-- fixxfdi.c - Implement __fixxfdi -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixxfdi for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if !_ARCH_PPC + +#include "int_lib.h" + +// Returns: convert a to a signed long long, rounding toward zero. + +// Assumption: long double is an intel 80 bit floating point type padded with 6 +// bytes di_int is a 64 bit integral type value in long double is representable +// in di_int (no range checking performed) + +// gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee +// eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm +// mmmm mmmm mmmm + +#if defined(_MSC_VER) && !defined(__clang__) +// MSVC throws a warning about 'uninitialized variable use' here, +// disable it for builds that warn-as-error +#pragma warning(push) +#pragma warning(disable : 4700) +#endif + +COMPILER_RT_ABI di_int __fixxfdi(long double a) { + const di_int di_max = (di_int)((~(du_int)0) / 2); + const di_int di_min = -di_max - 1; + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0) + return 0; + if ((unsigned)e >= sizeof(di_int) * CHAR_BIT) + return a > 0 ? di_max : di_min; + di_int s = -(si_int)((fb.u.high.s.low & 0x00008000) >> 15); + di_int r = fb.u.low.all; + r = (du_int)r >> (63 - e); + return (r ^ s) - s; +} + +#if defined(_MSC_VER) && !defined(__clang__) +#pragma warning(pop) +#endif + +#endif // !_ARCH_PPC diff --git a/wasmrt/llvm-builtins/builtins/fixxfti.c b/wasmrt/llvm-builtins/builtins/fixxfti.c new file mode 100644 index 0000000..90e0311 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fixxfti.c @@ -0,0 +1,46 @@ +//===-- fixxfti.c - Implement __fixxfti -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixxfti for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +// Returns: convert a to a signed long long, rounding toward zero. + +// Assumption: long double is an intel 80 bit floating point type padded with 6 +// bytes ti_int is a 128 bit integral type value in long double is representable +// in ti_int + +// gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee +// eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm +// mmmm mmmm mmmm + +COMPILER_RT_ABI ti_int __fixxfti(long double a) { + const ti_int ti_max = (ti_int)((~(tu_int)0) / 2); + const ti_int ti_min = -ti_max - 1; + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.s.low & 0x00007FFF) - 16383; + if (e < 0) + return 0; + ti_int s = -(si_int)((fb.u.high.s.low & 0x00008000) >> 15); + ti_int r = fb.u.low.all; + if ((unsigned)e >= sizeof(ti_int) * CHAR_BIT) + return a > 0 ? ti_max : ti_min; + if (e > 63) + r <<= (e - 63); + else + r >>= (63 - e); + return (r ^ s) - s; +} + +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/floatdidf.c b/wasmrt/llvm-builtins/builtins/floatdidf.c new file mode 100644 index 0000000..c994aad --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floatdidf.c @@ -0,0 +1,107 @@ +//===-- floatdidf.c - Implement __floatdidf -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatdidf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: convert a to a double, rounding toward even. + +// Assumption: double is a IEEE 64 bit floating point type +// di_int is a 64 bit integral type + +// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm +// mmmm + +#ifndef __SOFTFP__ +// Support for systems that have hardware floating-point; we'll set the inexact +// flag as a side-effect of this computation. + +COMPILER_RT_ABI double __floatdidf(di_int a) { + static const double twop52 = 4503599627370496.0; // 0x1.0p52 + static const double twop32 = 4294967296.0; // 0x1.0p32 + + union { + int64_t x; + double d; + } low = {.d = twop52}; + + const double high = (int32_t)(a >> 32) * twop32; + low.x |= a & INT64_C(0x00000000ffffffff); + + const double result = (high - twop52) + low.d; + return result; +} + +#else +// Support for systems that don't have hardware floating-point; there are no +// flags to set, and we don't want to code-gen to an unknown soft-float +// implementation. + +COMPILER_RT_ABI double __floatdidf(di_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(di_int) * CHAR_BIT; + const di_int s = a >> (N - 1); + a = (du_int)(a ^ s) - s; + int sd = N - __builtin_clzll(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > DBL_MANT_DIG) { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit DBL_MANT_DIG-1 bits to the right of 1 + // Q = bit DBL_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = ((du_int)a >> (sd - (DBL_MANT_DIG + 2))) | + ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits + if (a & ((du_int)1 << DBL_MANT_DIG)) { + a >>= 1; + ++e; + } + // a is now rounded to DBL_MANT_DIG bits + } else { + a <<= (DBL_MANT_DIG - sd); + // a is now rounded to DBL_MANT_DIG bits + } + double_bits fb; + fb.u.s.high = ((su_int)s & 0x80000000) | // sign + ((su_int)(e + 1023) << 20) | // exponent + ((su_int)(a >> 32) & 0x000FFFFF); // mantissa-high + fb.u.s.low = (su_int)a; // mantissa-low + return fb.f; +} +#endif + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI double __aeabi_l2d(di_int a) { return __floatdidf(a); } +#else +COMPILER_RT_ALIAS(__floatdidf, __aeabi_l2d) +#endif +#endif + +#if defined(__MINGW32__) && defined(__arm__) +COMPILER_RT_ALIAS(__floatdidf, __i64tod) +#endif diff --git a/wasmrt/llvm-builtins/builtins/floatdisf.c b/wasmrt/llvm-builtins/builtins/floatdisf.c new file mode 100644 index 0000000..0b62ed8 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floatdisf.c @@ -0,0 +1,79 @@ +//===-- floatdisf.c - Implement __floatdisf -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatdisf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +// Returns: convert a to a float, rounding toward even. + +// Assumption: float is a IEEE 32 bit floating point type +// di_int is a 64 bit integral type + +// seee eeee emmm mmmm mmmm mmmm mmmm mmmm + +#include "int_lib.h" + +COMPILER_RT_ABI float __floatdisf(di_int a) { + if (a == 0) + return 0.0F; + const unsigned N = sizeof(di_int) * CHAR_BIT; + const di_int s = a >> (N - 1); + a = (du_int)(a ^ s) - s; + int sd = N - __builtin_clzll(a); // number of significant digits + si_int e = sd - 1; // exponent + if (sd > FLT_MANT_DIG) { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit FLT_MANT_DIG-1 bits to the right of 1 + // Q = bit FLT_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = ((du_int)a >> (sd - (FLT_MANT_DIG + 2))) | + ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG + 2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits + if (a & ((du_int)1 << FLT_MANT_DIG)) { + a >>= 1; + ++e; + } + // a is now rounded to FLT_MANT_DIG bits + } else { + a <<= (FLT_MANT_DIG - sd); + // a is now rounded to FLT_MANT_DIG bits + } + float_bits fb; + fb.u = ((su_int)s & 0x80000000) | // sign + ((e + 127) << 23) | // exponent + ((su_int)a & 0x007FFFFF); // mantissa + return fb.f; +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI float __aeabi_l2f(di_int a) { return __floatdisf(a); } +#else +COMPILER_RT_ALIAS(__floatdisf, __aeabi_l2f) +#endif +#endif + +#if defined(__MINGW32__) && defined(__arm__) +COMPILER_RT_ALIAS(__floatdisf, __i64tos) +#endif diff --git a/wasmrt/llvm-builtins/builtins/floatditf.c b/wasmrt/llvm-builtins/builtins/floatditf.c new file mode 100644 index 0000000..c6e326a --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floatditf.c @@ -0,0 +1,49 @@ +//===-- lib/floatditf.c - integer -> quad-precision conversion ----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements di_int to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_TF_MODE) +COMPILER_RT_ABI fp_t __floatditf(di_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + du_int aAbs = (du_int)a; + if (a < 0) { + sign = signBit; + aAbs = ~(du_int)a + 1U; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clzll(aAbs); + rep_t result; + + // Shift a into the significand field, rounding if it is a right-shift + const int shift = significandBits - exponent; + result = (rep_t)aAbs << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); +} + +#endif diff --git a/wasmrt/llvm-builtins/builtins/floatdixf.c b/wasmrt/llvm-builtins/builtins/floatdixf.c new file mode 100644 index 0000000..ad5deb2 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floatdixf.c @@ -0,0 +1,41 @@ +//===-- floatdixf.c - Implement __floatdixf -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatdixf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if !_ARCH_PPC + +#include "int_lib.h" + +// Returns: convert a to a long double, rounding toward even. + +// Assumption: long double is a IEEE 80 bit floating point type padded to 128 +// bits di_int is a 64 bit integral type + +// gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee +// eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm +// mmmm mmmm mmmm + +COMPILER_RT_ABI long double __floatdixf(di_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(di_int) * CHAR_BIT; + const di_int s = a >> (N - 1); + a = (a ^ s) - s; + int clz = __builtin_clzll(a); + int e = (N - 1) - clz; // exponent + long_double_bits fb; + fb.u.high.s.low = ((su_int)s & 0x00008000) | // sign + (e + 16383); // exponent + fb.u.low.all = a << clz; // mantissa + return fb.f; +} + +#endif // !_ARCH_PPC diff --git a/wasmrt/llvm-builtins/builtins/floatsidf.c b/wasmrt/llvm-builtins/builtins/floatsidf.c new file mode 100644 index 0000000..28cf32f --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floatsidf.c @@ -0,0 +1,57 @@ +//===-- lib/floatsidf.c - integer -> double-precision conversion --*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements integer to double-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +#include "int_lib.h" + +COMPILER_RT_ABI fp_t __floatsidf(si_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + if (a < 0) { + sign = signBit; + a = -a; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - clzsi(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. Extra + // cast to unsigned int is necessary to get the correct behavior for + // the input INT_MIN. + const int shift = significandBits - exponent; + result = (rep_t)(su_int)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_i2d(si_int a) { return __floatsidf(a); } +#else +COMPILER_RT_ALIAS(__floatsidf, __aeabi_i2d) +#endif +#endif diff --git a/wasmrt/llvm-builtins/builtins/floatsisf.c b/wasmrt/llvm-builtins/builtins/floatsisf.c new file mode 100644 index 0000000..c01f81e --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floatsisf.c @@ -0,0 +1,65 @@ +//===-- lib/floatsisf.c - integer -> single-precision conversion --*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements integer to single-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +#include "int_lib.h" + +COMPILER_RT_ABI fp_t __floatsisf(si_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + if (a < 0) { + sign = signBit; + a = -a; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - clzsi(a); + rep_t result; + + // Shift a into the significand field, rounding if it is a right-shift + if (exponent <= significandBits) { + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + } else { + const int shift = exponent - significandBits; + result = (rep_t)a >> shift ^ implicitBit; + rep_t round = (rep_t)a << (typeWidth - shift); + if (round > signBit) + result++; + if (round == signBit) + result += result & 1; + } + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_i2f(int a) { return __floatsisf(a); } +#else +COMPILER_RT_ALIAS(__floatsisf, __aeabi_i2f) +#endif +#endif diff --git a/wasmrt/llvm-builtins/builtins/floatsitf.c b/wasmrt/llvm-builtins/builtins/floatsitf.c new file mode 100644 index 0000000..4d5b52f --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floatsitf.c @@ -0,0 +1,49 @@ +//===-- lib/floatsitf.c - integer -> quad-precision conversion ----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements integer to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_TF_MODE) +COMPILER_RT_ABI fp_t __floatsitf(si_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // All other cases begin by extracting the sign and absolute value of a + rep_t sign = 0; + su_int aAbs = (su_int)a; + if (a < 0) { + sign = signBit; + aAbs = ~(su_int)a + (su_int)1U; + } + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - clzsi(aAbs); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)aAbs << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + // Insert the sign bit and return + return fromRep(result | sign); +} + +#endif diff --git a/wasmrt/llvm-builtins/builtins/floattidf.c b/wasmrt/llvm-builtins/builtins/floattidf.c new file mode 100644 index 0000000..7bfe87f --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floattidf.c @@ -0,0 +1,73 @@ +//===-- floattidf.c - Implement __floattidf -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floattidf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +// Returns: convert a to a double, rounding toward even. + +// Assumption: double is a IEEE 64 bit floating point type +// ti_int is a 128 bit integral type + +// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm +// mmmm + +COMPILER_RT_ABI double __floattidf(ti_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N - 1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); // number of significant digits + si_int e = sd - 1; // exponent + if (sd > DBL_MANT_DIG) { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit DBL_MANT_DIG-1 bits to the right of 1 + // Q = bit DBL_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (DBL_MANT_DIG + 2))) | + ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits + if (a & ((tu_int)1 << DBL_MANT_DIG)) { + a >>= 1; + ++e; + } + // a is now rounded to DBL_MANT_DIG bits + } else { + a <<= (DBL_MANT_DIG - sd); + // a is now rounded to DBL_MANT_DIG bits + } + double_bits fb; + fb.u.s.high = ((su_int)s & 0x80000000) | // sign + ((e + 1023) << 20) | // exponent + ((su_int)(a >> 32) & 0x000FFFFF); // mantissa-high + fb.u.s.low = (su_int)a; // mantissa-low + return fb.f; +} + +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/floattisf.c b/wasmrt/llvm-builtins/builtins/floattisf.c new file mode 100644 index 0000000..717cb36 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floattisf.c @@ -0,0 +1,71 @@ +//===-- floattisf.c - Implement __floattisf -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floattisf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +// Returns: convert a to a float, rounding toward even. + +// Assumption: float is a IEEE 32 bit floating point type +// ti_int is a 128 bit integral type + +// seee eeee emmm mmmm mmmm mmmm mmmm mmmm + +COMPILER_RT_ABI float __floattisf(ti_int a) { + if (a == 0) + return 0.0F; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N - 1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); // number of significant digits + si_int e = sd - 1; // exponent + if (sd > FLT_MANT_DIG) { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit FLT_MANT_DIG-1 bits to the right of 1 + // Q = bit FLT_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (FLT_MANT_DIG + 2))) | + ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG + 2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits + if (a & ((tu_int)1 << FLT_MANT_DIG)) { + a >>= 1; + ++e; + } + // a is now rounded to FLT_MANT_DIG bits + } else { + a <<= (FLT_MANT_DIG - sd); + // a is now rounded to FLT_MANT_DIG bits + } + float_bits fb; + fb.u = ((su_int)s & 0x80000000) | // sign + ((e + 127) << 23) | // exponent + ((su_int)a & 0x007FFFFF); // mantissa + return fb.f; +} + +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/floattitf.c b/wasmrt/llvm-builtins/builtins/floattitf.c new file mode 100644 index 0000000..fff0755 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floattitf.c @@ -0,0 +1,78 @@ +//===-- lib/floattitf.c - int128 -> quad-precision conversion -----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements ti_int to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" +#include "int_lib.h" + +// Returns: convert a ti_int to a fp_t, rounding toward even. + +// Assumption: fp_t is a IEEE 128 bit floating point type +// ti_int is a 128 bit integral type + +// seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm +// mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm +// mmmm mmmm mmmm + +#if defined(CRT_HAS_TF_MODE) +COMPILER_RT_ABI fp_t __floattitf(ti_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N - 1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > TF_MANT_DIG) { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit LDBL_MANT_DIG-1 bits to the right of 1 + // Q = bit LDBL_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) { + case TF_MANT_DIG + 1: + a <<= 1; + break; + case TF_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (TF_MANT_DIG + 2))) | + ((a & ((tu_int)(-1) >> ((N + TF_MANT_DIG + 2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits + if (a & ((tu_int)1 << TF_MANT_DIG)) { + a >>= 1; + ++e; + } + // a is now rounded to LDBL_MANT_DIG bits + } else { + a <<= (TF_MANT_DIG - sd); + // a is now rounded to LDBL_MANT_DIG bits + } + + long_double_bits fb; + fb.u.high.all = (s & 0x8000000000000000LL) // sign + | (du_int)(e + 16383) << 48 // exponent + | ((a >> 64) & 0x0000ffffffffffffLL); // significand + fb.u.low.all = (du_int)(a); + return fb.f; +} + +#endif diff --git a/wasmrt/llvm-builtins/builtins/floattixf.c b/wasmrt/llvm-builtins/builtins/floattixf.c new file mode 100644 index 0000000..23796f1 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floattixf.c @@ -0,0 +1,73 @@ +//===-- floattixf.c - Implement __floattixf -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floattixf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +// Returns: convert a to a long double, rounding toward even. + +// Assumption: long double is a IEEE 80 bit floating point type padded to 128 +// bits ti_int is a 128 bit integral type + +// gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee +// eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm +// mmmm mmmm mmmm + +COMPILER_RT_ABI long double __floattixf(ti_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N - 1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > LDBL_MANT_DIG) { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit LDBL_MANT_DIG-1 bits to the right of 1 + // Q = bit LDBL_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) { + case LDBL_MANT_DIG + 1: + a <<= 1; + break; + case LDBL_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (LDBL_MANT_DIG + 2))) | + ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG + 2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits + if (a & ((tu_int)1 << LDBL_MANT_DIG)) { + a >>= 1; + ++e; + } + // a is now rounded to LDBL_MANT_DIG bits + } else { + a <<= (LDBL_MANT_DIG - sd); + // a is now rounded to LDBL_MANT_DIG bits + } + long_double_bits fb; + fb.u.high.s.low = ((su_int)s & 0x8000) | // sign + (e + 16383); // exponent + fb.u.low.all = (du_int)a; // mantissa + return fb.f; +} + +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/floatundidf.c b/wasmrt/llvm-builtins/builtins/floatundidf.c new file mode 100644 index 0000000..2ec802c --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floatundidf.c @@ -0,0 +1,110 @@ +//===-- floatundidf.c - Implement __floatundidf ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatundidf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +// Returns: convert a to a double, rounding toward even. + +// Assumption: double is a IEEE 64 bit floating point type +// du_int is a 64 bit integral type + +// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm +// mmmm + +#include "int_lib.h" + +#ifndef __SOFTFP__ +// Support for systems that have hardware floating-point; we'll set the inexact +// flag as a side-effect of this computation. + +COMPILER_RT_ABI double __floatundidf(du_int a) { + static const double twop52 = 4503599627370496.0; // 0x1.0p52 + static const double twop84 = 19342813113834066795298816.0; // 0x1.0p84 + static const double twop84_plus_twop52 = + 19342813118337666422669312.0; // 0x1.00000001p84 + + union { + uint64_t x; + double d; + } high = {.d = twop84}; + union { + uint64_t x; + double d; + } low = {.d = twop52}; + + high.x |= a >> 32; + low.x |= a & UINT64_C(0x00000000ffffffff); + + const double result = (high.d - twop84_plus_twop52) + low.d; + return result; +} + +#else +// Support for systems that don't have hardware floating-point; there are no +// flags to set, and we don't want to code-gen to an unknown soft-float +// implementation. + +COMPILER_RT_ABI double __floatundidf(du_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(du_int) * CHAR_BIT; + int sd = N - __builtin_clzll(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > DBL_MANT_DIG) { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit DBL_MANT_DIG-1 bits to the right of 1 + // Q = bit DBL_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (DBL_MANT_DIG + 2))) | + ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits + if (a & ((du_int)1 << DBL_MANT_DIG)) { + a >>= 1; + ++e; + } + // a is now rounded to DBL_MANT_DIG bits + } else { + a <<= (DBL_MANT_DIG - sd); + // a is now rounded to DBL_MANT_DIG bits + } + double_bits fb; + fb.u.s.high = ((su_int)(e + 1023) << 20) | // exponent + ((su_int)(a >> 32) & 0x000FFFFF); // mantissa-high + fb.u.s.low = (su_int)a; // mantissa-low + return fb.f; +} +#endif + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI double __aeabi_ul2d(du_int a) { return __floatundidf(a); } +#else +COMPILER_RT_ALIAS(__floatundidf, __aeabi_ul2d) +#endif +#endif + +#if defined(__MINGW32__) && defined(__arm__) +COMPILER_RT_ALIAS(__floatundidf, __u64tod) +#endif diff --git a/wasmrt/llvm-builtins/builtins/floatundisf.c b/wasmrt/llvm-builtins/builtins/floatundisf.c new file mode 100644 index 0000000..2a4157d --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floatundisf.c @@ -0,0 +1,76 @@ +//===-- floatundisf.c - Implement __floatundisf ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatundisf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +// Returns: convert a to a float, rounding toward even. + +// Assumption: float is a IEEE 32 bit floating point type +// du_int is a 64 bit integral type + +// seee eeee emmm mmmm mmmm mmmm mmmm mmmm + +#include "int_lib.h" + +COMPILER_RT_ABI float __floatundisf(du_int a) { + if (a == 0) + return 0.0F; + const unsigned N = sizeof(du_int) * CHAR_BIT; + int sd = N - __builtin_clzll(a); // number of significant digits + si_int e = sd - 1; // 8 exponent + if (sd > FLT_MANT_DIG) { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit FLT_MANT_DIG-1 bits to the right of 1 + // Q = bit FLT_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (FLT_MANT_DIG + 2))) | + ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG + 2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits + if (a & ((du_int)1 << FLT_MANT_DIG)) { + a >>= 1; + ++e; + } + // a is now rounded to FLT_MANT_DIG bits + } else { + a <<= (FLT_MANT_DIG - sd); + // a is now rounded to FLT_MANT_DIG bits + } + float_bits fb; + fb.u = ((e + 127) << 23) | // exponent + ((su_int)a & 0x007FFFFF); // mantissa + return fb.f; +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI float __aeabi_ul2f(du_int a) { return __floatundisf(a); } +#else +COMPILER_RT_ALIAS(__floatundisf, __aeabi_ul2f) +#endif +#endif + +#if defined(__MINGW32__) && defined(__arm__) +COMPILER_RT_ALIAS(__floatundisf, __u64tos) +#endif diff --git a/wasmrt/llvm-builtins/builtins/floatunditf.c b/wasmrt/llvm-builtins/builtins/floatunditf.c new file mode 100644 index 0000000..abe0ca9 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floatunditf.c @@ -0,0 +1,40 @@ +//===-- lib/floatunditf.c - uint -> quad-precision conversion -----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements du_int to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_TF_MODE) +COMPILER_RT_ABI fp_t __floatunditf(du_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - __builtin_clzll(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); +} + +#endif diff --git a/wasmrt/llvm-builtins/builtins/floatundixf.c b/wasmrt/llvm-builtins/builtins/floatundixf.c new file mode 100644 index 0000000..85264ad --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floatundixf.c @@ -0,0 +1,37 @@ +//===-- floatundixf.c - Implement __floatundixf ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatundixf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if !_ARCH_PPC + +#include "int_lib.h" + +// Returns: convert a to a long double, rounding toward even. + +// Assumption: long double is a IEEE 80 bit floating point type padded to 128 +// bits du_int is a 64 bit integral type + +// gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee +// eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm +// mmmm mmmm mmmm +COMPILER_RT_ABI long double __floatundixf(du_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(du_int) * CHAR_BIT; + int clz = __builtin_clzll(a); + int e = (N - 1) - clz; // exponent + long_double_bits fb; + fb.u.high.s.low = (e + 16383); // exponent + fb.u.low.all = a << clz; // mantissa + return fb.f; +} + +#endif // _ARCH_PPC diff --git a/wasmrt/llvm-builtins/builtins/floatunsidf.c b/wasmrt/llvm-builtins/builtins/floatunsidf.c new file mode 100644 index 0000000..9b3e5fe --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floatunsidf.c @@ -0,0 +1,47 @@ +//===-- lib/floatunsidf.c - uint -> double-precision conversion ---*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements unsigned integer to double-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define DOUBLE_PRECISION +#include "fp_lib.h" + +#include "int_lib.h" + +COMPILER_RT_ABI fp_t __floatunsidf(su_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - clzsi(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_ui2d(su_int a) { return __floatunsidf(a); } +#else +COMPILER_RT_ALIAS(__floatunsidf, __aeabi_ui2d) +#endif +#endif diff --git a/wasmrt/llvm-builtins/builtins/floatunsisf.c b/wasmrt/llvm-builtins/builtins/floatunsisf.c new file mode 100644 index 0000000..ec062b5 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floatunsisf.c @@ -0,0 +1,57 @@ +//===-- lib/floatunsisf.c - uint -> single-precision conversion ---*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements unsigned integer to single-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define SINGLE_PRECISION +#include "fp_lib.h" + +#include "int_lib.h" + +COMPILER_RT_ABI fp_t __floatunsisf(su_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - clzsi(a); + rep_t result; + + // Shift a into the significand field, rounding if it is a right-shift + if (exponent <= significandBits) { + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + } else { + const int shift = exponent - significandBits; + result = (rep_t)a >> shift ^ implicitBit; + rep_t round = (rep_t)a << (typeWidth - shift); + if (round > signBit) + result++; + if (round == signBit) + result += result & 1; + } + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); +} + +#if defined(__ARM_EABI__) +#if defined(COMPILER_RT_ARMHF_TARGET) +AEABI_RTABI fp_t __aeabi_ui2f(unsigned int a) { return __floatunsisf(a); } +#else +COMPILER_RT_ALIAS(__floatunsisf, __aeabi_ui2f) +#endif +#endif diff --git a/wasmrt/llvm-builtins/builtins/floatunsitf.c b/wasmrt/llvm-builtins/builtins/floatunsitf.c new file mode 100644 index 0000000..3f0a524 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floatunsitf.c @@ -0,0 +1,40 @@ +//===-- lib/floatunsitf.c - uint -> quad-precision conversion -----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements unsigned integer to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" + +#if defined(CRT_HAS_TF_MODE) +COMPILER_RT_ABI fp_t __floatunsitf(su_int a) { + + const int aWidth = sizeof a * CHAR_BIT; + + // Handle zero as a special case to protect clz + if (a == 0) + return fromRep(0); + + // Exponent of (fp_t)a is the width of abs(a). + const int exponent = (aWidth - 1) - clzsi(a); + rep_t result; + + // Shift a into the significand field and clear the implicit bit. + const int shift = significandBits - exponent; + result = (rep_t)a << shift ^ implicitBit; + + // Insert the exponent + result += (rep_t)(exponent + exponentBias) << significandBits; + return fromRep(result); +} + +#endif diff --git a/wasmrt/llvm-builtins/builtins/floatuntidf.c b/wasmrt/llvm-builtins/builtins/floatuntidf.c new file mode 100644 index 0000000..4dfca8e --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floatuntidf.c @@ -0,0 +1,70 @@ +//===-- floatuntidf.c - Implement __floatuntidf ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatuntidf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +// Returns: convert a to a double, rounding toward even. + +// Assumption: double is a IEEE 64 bit floating point type +// tu_int is a 128 bit integral type + +// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm +// mmmm + +COMPILER_RT_ABI double __floatuntidf(tu_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); // number of significant digits + si_int e = sd - 1; // exponent + if (sd > DBL_MANT_DIG) { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit DBL_MANT_DIG-1 bits to the right of 1 + // Q = bit DBL_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (DBL_MANT_DIG + 2))) | + ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG + 2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits + if (a & ((tu_int)1 << DBL_MANT_DIG)) { + a >>= 1; + ++e; + } + // a is now rounded to DBL_MANT_DIG bits + } else { + a <<= (DBL_MANT_DIG - sd); + // a is now rounded to DBL_MANT_DIG bits + } + double_bits fb; + fb.u.s.high = ((e + 1023) << 20) | // exponent + ((su_int)(a >> 32) & 0x000FFFFF); // mantissa-high + fb.u.s.low = (su_int)a; // mantissa-low + return fb.f; +} + +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/floatuntisf.c b/wasmrt/llvm-builtins/builtins/floatuntisf.c new file mode 100644 index 0000000..a53659c --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floatuntisf.c @@ -0,0 +1,68 @@ +//===-- floatuntisf.c - Implement __floatuntisf ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatuntisf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +// Returns: convert a to a float, rounding toward even. + +// Assumption: float is a IEEE 32 bit floating point type +// tu_int is a 128 bit integral type + +// seee eeee emmm mmmm mmmm mmmm mmmm mmmm + +COMPILER_RT_ABI float __floatuntisf(tu_int a) { + if (a == 0) + return 0.0F; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); // number of significant digits + si_int e = sd - 1; // exponent + if (sd > FLT_MANT_DIG) { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit FLT_MANT_DIG-1 bits to the right of 1 + // Q = bit FLT_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (FLT_MANT_DIG + 2))) | + ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG + 2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits + if (a & ((tu_int)1 << FLT_MANT_DIG)) { + a >>= 1; + ++e; + } + // a is now rounded to FLT_MANT_DIG bits + } else { + a <<= (FLT_MANT_DIG - sd); + // a is now rounded to FLT_MANT_DIG bits + } + float_bits fb; + fb.u = ((e + 127) << 23) | // exponent + ((su_int)a & 0x007FFFFF); // mantissa + return fb.f; +} + +#endif // CRT_HAS_128BIT diff --git a/wasmrt/llvm-builtins/builtins/floatuntitf.c b/wasmrt/llvm-builtins/builtins/floatuntitf.c new file mode 100644 index 0000000..33a81b3 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floatuntitf.c @@ -0,0 +1,75 @@ +//===-- lib/floatuntitf.c - uint128 -> quad-precision conversion --*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements tu_int to quad-precision conversion for the +// compiler-rt library in the IEEE-754 default round-to-nearest, ties-to-even +// mode. +// +//===----------------------------------------------------------------------===// + +#define QUAD_PRECISION +#include "fp_lib.h" +#include "int_lib.h" + +// Returns: convert a tu_int to a fp_t, rounding toward even. + +// Assumption: fp_t is a IEEE 128 bit floating point type +// tu_int is a 128 bit integral type + +// seee eeee eeee eeee mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm +// mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm +// mmmm mmmm mmmm + +#if defined(CRT_HAS_TF_MODE) +COMPILER_RT_ABI fp_t __floatuntitf(tu_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > TF_MANT_DIG) { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit TF_MANT_DIG-1 bits to the right of 1 + // Q = bit TF_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) { + case TF_MANT_DIG + 1: + a <<= 1; + break; + case TF_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (TF_MANT_DIG + 2))) | + ((a & ((tu_int)(-1) >> ((N + TF_MANT_DIG + 2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to TF_MANT_DIG or TF_MANT_DIG+1 bits + if (a & ((tu_int)1 << TF_MANT_DIG)) { + a >>= 1; + ++e; + } + // a is now rounded to TF_MANT_DIG bits + } else { + a <<= (TF_MANT_DIG - sd); + // a is now rounded to TF_MANT_DIG bits + } + + long_double_bits fb; + fb.u.high.all = (du_int)(e + 16383) << 48 // exponent + | ((a >> 64) & 0x0000ffffffffffffLL); // significand + fb.u.low.all = (du_int)(a); + return fb.f; +} + +#endif diff --git a/wasmrt/llvm-builtins/builtins/floatuntixf.c b/wasmrt/llvm-builtins/builtins/floatuntixf.c new file mode 100644 index 0000000..efd8a27 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/floatuntixf.c @@ -0,0 +1,70 @@ +//===-- floatuntixf.c - Implement __floatuntixf ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatuntixf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +#ifdef CRT_HAS_128BIT + +// Returns: convert a to a long double, rounding toward even. + +// Assumption: long double is a IEEE 80 bit floating point type padded to 128 +// bits tu_int is a 128 bit integral type + +// gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee +// eeee | 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm +// mmmm mmmm mmmm + +COMPILER_RT_ABI long double __floatuntixf(tu_int a) { + if (a == 0) + return 0.0; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > LDBL_MANT_DIG) { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit LDBL_MANT_DIG-1 bits to the right of 1 + // Q = bit LDBL_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) { + case LDBL_MANT_DIG + 1: + a <<= 1; + break; + case LDBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (LDBL_MANT_DIG + 2))) | + ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG + 2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits + if (a & ((tu_int)1 << LDBL_MANT_DIG)) { + a >>= 1; + ++e; + } + // a is now rounded to LDBL_MANT_DIG bits + } else { + a <<= (LDBL_MANT_DIG - sd); + // a is now rounded to LDBL_MANT_DIG bits + } + long_double_bits fb; + fb.u.high.s.low = (e + 16383); // exponent + fb.u.low.all = (du_int)a; // mantissa + return fb.f; +} + +#endif diff --git a/wasmrt/llvm-builtins/builtins/fp_add_impl.inc b/wasmrt/llvm-builtins/builtins/fp_add_impl.inc new file mode 100644 index 0000000..7133358 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fp_add_impl.inc @@ -0,0 +1,172 @@ +//===----- lib/fp_add_impl.inc - floaing point addition -----------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements soft-float addition with the IEEE-754 default rounding +// (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" +#include "fp_mode.h" + +static __inline fp_t __addXf3__(fp_t a, fp_t b) { + rep_t aRep = toRep(a); + rep_t bRep = toRep(b); + const rep_t aAbs = aRep & absMask; + const rep_t bAbs = bRep & absMask; + + // Detect if a or b is zero, infinity, or NaN. + if (aAbs - REP_C(1) >= infRep - REP_C(1) || + bAbs - REP_C(1) >= infRep - REP_C(1)) { + // NaN + anything = qNaN + if (aAbs > infRep) + return fromRep(toRep(a) | quietBit); + // anything + NaN = qNaN + if (bAbs > infRep) + return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // +/-infinity + -/+infinity = qNaN + if ((toRep(a) ^ toRep(b)) == signBit) + return fromRep(qnanRep); + // +/-infinity + anything remaining = +/- infinity + else + return a; + } + + // anything remaining + +/-infinity = +/-infinity + if (bAbs == infRep) + return b; + + // zero + anything = anything + if (!aAbs) { + // We need to get the sign right for zero + zero. + if (!bAbs) + return fromRep(toRep(a) & toRep(b)); + else + return b; + } + + // anything + zero = anything + if (!bAbs) + return a; + } + + // Swap a and b if necessary so that a has the larger absolute value. + if (bAbs > aAbs) { + const rep_t temp = aRep; + aRep = bRep; + bRep = temp; + } + + // Extract the exponent and significand from the (possibly swapped) a and b. + int aExponent = aRep >> significandBits & maxExponent; + int bExponent = bRep >> significandBits & maxExponent; + rep_t aSignificand = aRep & significandMask; + rep_t bSignificand = bRep & significandMask; + + // Normalize any denormals, and adjust the exponent accordingly. + if (aExponent == 0) + aExponent = normalize(&aSignificand); + if (bExponent == 0) + bExponent = normalize(&bSignificand); + + // The sign of the result is the sign of the larger operand, a. If they + // have opposite signs, we are performing a subtraction. Otherwise, we + // perform addition. + const rep_t resultSign = aRep & signBit; + const bool subtraction = (aRep ^ bRep) & signBit; + + // Shift the significands to give us round, guard and sticky, and set the + // implicit significand bit. If we fell through from the denormal path it + // was already set by normalize( ), but setting it twice won't hurt + // anything. + aSignificand = (aSignificand | implicitBit) << 3; + bSignificand = (bSignificand | implicitBit) << 3; + + // Shift the significand of b by the difference in exponents, with a sticky + // bottom bit to get rounding correct. + const unsigned int align = aExponent - bExponent; + if (align) { + if (align < typeWidth) { + const bool sticky = (bSignificand << (typeWidth - align)) != 0; + bSignificand = bSignificand >> align | sticky; + } else { + bSignificand = 1; // Set the sticky bit. b is known to be non-zero. + } + } + if (subtraction) { + aSignificand -= bSignificand; + // If a == -b, return +zero. + if (aSignificand == 0) + return fromRep(0); + + // If partial cancellation occured, we need to left-shift the result + // and adjust the exponent. + if (aSignificand < implicitBit << 3) { + const int shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3); + aSignificand <<= shift; + aExponent -= shift; + } + } else /* addition */ { + aSignificand += bSignificand; + + // If the addition carried up, we need to right-shift the result and + // adjust the exponent. + if (aSignificand & implicitBit << 4) { + const bool sticky = aSignificand & 1; + aSignificand = aSignificand >> 1 | sticky; + aExponent += 1; + } + } + + // If we have overflowed the type, return +/- infinity. + if (aExponent >= maxExponent) + return fromRep(infRep | resultSign); + + if (aExponent <= 0) { + // The result is denormal before rounding. The exponent is zero and we + // need to shift the significand. + const int shift = 1 - aExponent; + const bool sticky = (aSignificand << (typeWidth - shift)) != 0; + aSignificand = aSignificand >> shift | sticky; + aExponent = 0; + } + + // Low three bits are round, guard, and sticky. + const int roundGuardSticky = aSignificand & 0x7; + + // Shift the significand into place, and mask off the implicit bit. + rep_t result = aSignificand >> 3 & significandMask; + + // Insert the exponent and sign. + result |= (rep_t)aExponent << significandBits; + result |= resultSign; + + // Perform the final rounding. The result may overflow to infinity, but + // that is the correct result in that case. + switch (__fe_getround()) { + case CRT_FE_TONEAREST: + if (roundGuardSticky > 0x4) + result++; + if (roundGuardSticky == 0x4) + result += result & 1; + break; + case CRT_FE_DOWNWARD: + if (resultSign && roundGuardSticky) result++; + break; + case CRT_FE_UPWARD: + if (!resultSign && roundGuardSticky) result++; + break; + case CRT_FE_TOWARDZERO: + break; + } + if (roundGuardSticky) + __fe_raise_inexact(); + return fromRep(result); +} diff --git a/wasmrt/llvm-builtins/builtins/fp_compare_impl.inc b/wasmrt/llvm-builtins/builtins/fp_compare_impl.inc new file mode 100644 index 0000000..a9a4f6f --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fp_compare_impl.inc @@ -0,0 +1,119 @@ +//===-- lib/fp_compare_impl.inc - Floating-point comparison -------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +// GCC uses long (at least for x86_64) as the return type of the comparison +// functions. We need to ensure that the return value is sign-extended in the +// same way as GCC expects (since otherwise GCC-generated __builtin_isinf +// returns true for finite 128-bit floating-point numbers). +#ifdef __aarch64__ +// AArch64 GCC overrides libgcc_cmp_return to use int instead of long. +typedef int CMP_RESULT; +#elif __SIZEOF_POINTER__ == 8 && __SIZEOF_LONG__ == 4 +// LLP64 ABIs use long long instead of long. +typedef long long CMP_RESULT; +#elif __AVR__ +// AVR uses a single byte for the return value. +typedef char CMP_RESULT; +#else +// Otherwise the comparison functions return long. +typedef long CMP_RESULT; +#endif + +#if !defined(__clang__) && defined(__GNUC__) +// GCC uses a special __libgcc_cmp_return__ mode to define the return type, so +// check that we are ABI-compatible when compiling the builtins with GCC. +typedef int GCC_CMP_RESULT __attribute__((__mode__(__libgcc_cmp_return__))); +_Static_assert(sizeof(GCC_CMP_RESULT) == sizeof(CMP_RESULT), + "SOFTFP ABI not compatible with GCC"); +#endif + +enum { + LE_LESS = -1, + LE_EQUAL = 0, + LE_GREATER = 1, + LE_UNORDERED = 1, +}; + +static inline CMP_RESULT __leXf2__(fp_t a, fp_t b) { + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + // If either a or b is NaN, they are unordered. + if (aAbs > infRep || bAbs > infRep) + return LE_UNORDERED; + + // If a and b are both zeros, they are equal. + if ((aAbs | bAbs) == 0) + return LE_EQUAL; + + // If at least one of a and b is positive, we get the same result comparing + // a and b as signed integers as we would with a floating-point compare. + if ((aInt & bInt) >= 0) { + if (aInt < bInt) + return LE_LESS; + else if (aInt == bInt) + return LE_EQUAL; + else + return LE_GREATER; + } else { + // Otherwise, both are negative, so we need to flip the sense of the + // comparison to get the correct result. (This assumes a twos- or ones- + // complement integer representation; if integers are represented in a + // sign-magnitude representation, then this flip is incorrect). + if (aInt > bInt) + return LE_LESS; + else if (aInt == bInt) + return LE_EQUAL; + else + return LE_GREATER; + } +} + +enum { + GE_LESS = -1, + GE_EQUAL = 0, + GE_GREATER = 1, + GE_UNORDERED = -1 // Note: different from LE_UNORDERED +}; + +static inline CMP_RESULT __geXf2__(fp_t a, fp_t b) { + const srep_t aInt = toRep(a); + const srep_t bInt = toRep(b); + const rep_t aAbs = aInt & absMask; + const rep_t bAbs = bInt & absMask; + + if (aAbs > infRep || bAbs > infRep) + return GE_UNORDERED; + if ((aAbs | bAbs) == 0) + return GE_EQUAL; + if ((aInt & bInt) >= 0) { + if (aInt < bInt) + return GE_LESS; + else if (aInt == bInt) + return GE_EQUAL; + else + return GE_GREATER; + } else { + if (aInt > bInt) + return GE_LESS; + else if (aInt == bInt) + return GE_EQUAL; + else + return GE_GREATER; + } +} + +static inline CMP_RESULT __unordXf2__(fp_t a, fp_t b) { + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + return aAbs > infRep || bAbs > infRep; +} diff --git a/wasmrt/llvm-builtins/builtins/fp_div_impl.inc b/wasmrt/llvm-builtins/builtins/fp_div_impl.inc new file mode 100644 index 0000000..29bcd19 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fp_div_impl.inc @@ -0,0 +1,419 @@ +//===-- fp_div_impl.inc - Floating point division -----------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements soft-float division with the IEEE-754 default +// rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +// The __divXf3__ function implements Newton-Raphson floating point division. +// It uses 3 iterations for float32, 4 for float64 and 5 for float128, +// respectively. Due to number of significant bits being roughly doubled +// every iteration, the two modes are supported: N full-width iterations (as +// it is done for float32 by default) and (N-1) half-width iteration plus one +// final full-width iteration. It is expected that half-width integer +// operations (w.r.t rep_t size) can be performed faster for some hardware but +// they require error estimations to be computed separately due to larger +// computational errors caused by truncating intermediate results. + +// Half the bit-size of rep_t +#define HW (typeWidth / 2) +// rep_t-sized bitmask with lower half of bits set to ones +#define loMask (REP_C(-1) >> HW) + +#if NUMBER_OF_FULL_ITERATIONS < 1 +#error At least one full iteration is required +#endif + +static __inline fp_t __divXf3__(fp_t a, fp_t b) { + + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; + const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; + const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; + + rep_t aSignificand = toRep(a) & significandMask; + rep_t bSignificand = toRep(b) & significandMask; + int scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent - 1U >= maxExponent - 1U || + bExponent - 1U >= maxExponent - 1U) { + + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + + // NaN / anything = qNaN + if (aAbs > infRep) + return fromRep(toRep(a) | quietBit); + // anything / NaN = qNaN + if (bAbs > infRep) + return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // infinity / infinity = NaN + if (bAbs == infRep) + return fromRep(qnanRep); + // infinity / anything else = +/- infinity + else + return fromRep(aAbs | quotientSign); + } + + // anything else / infinity = +/- 0 + if (bAbs == infRep) + return fromRep(quotientSign); + + if (!aAbs) { + // zero / zero = NaN + if (!bAbs) + return fromRep(qnanRep); + // zero / anything else = +/- zero + else + return fromRep(quotientSign); + } + // anything else / zero = +/- infinity + if (!bAbs) + return fromRep(infRep | quotientSign); + + // One or both of a or b is denormal. The other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < implicitBit) + scale += normalize(&aSignificand); + if (bAbs < implicitBit) + scale -= normalize(&bSignificand); + } + + // Set the implicit significand bit. If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything. + aSignificand |= implicitBit; + bSignificand |= implicitBit; + + int writtenExponent = (aExponent - bExponent + scale) + exponentBias; + + const rep_t b_UQ1 = bSignificand << (typeWidth - significandBits - 1); + + // Align the significand of b as a UQ1.(n-1) fixed-point number in the range + // [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax + // polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2. + // The max error for this approximation is achieved at endpoints, so + // abs(x0(b) - 1/b) <= abs(x0(1) - 1/1) = 3/4 - 1/sqrt(2) = 0.04289..., + // which is about 4.5 bits. + // The initial approximation is between x0(1.0) = 0.9571... and x0(2.0) = 0.4571... + + // Then, refine the reciprocal estimate using a quadratically converging + // Newton-Raphson iteration: + // x_{n+1} = x_n * (2 - x_n * b) + // + // Let b be the original divisor considered "in infinite precision" and + // obtained from IEEE754 representation of function argument (with the + // implicit bit set). Corresponds to rep_t-sized b_UQ1 represented in + // UQ1.(W-1). + // + // Let b_hw be an infinitely precise number obtained from the highest (HW-1) + // bits of divisor significand (with the implicit bit set). Corresponds to + // half_rep_t-sized b_UQ1_hw represented in UQ1.(HW-1) that is a **truncated** + // version of b_UQ1. + // + // Let e_n := x_n - 1/b_hw + // E_n := x_n - 1/b + // abs(E_n) <= abs(e_n) + (1/b_hw - 1/b) + // = abs(e_n) + (b - b_hw) / (b*b_hw) + // <= abs(e_n) + 2 * 2^-HW + + // rep_t-sized iterations may be slower than the corresponding half-width + // variant depending on the handware and whether single/double/quad precision + // is selected. + // NB: Using half-width iterations increases computation errors due to + // rounding, so error estimations have to be computed taking the selected + // mode into account! +#if NUMBER_OF_HALF_ITERATIONS > 0 + // Starting with (n-1) half-width iterations + const half_rep_t b_UQ1_hw = bSignificand >> (significandBits + 1 - HW); + + // C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW + // with W0 being either 16 or 32 and W0 <= HW. + // That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which + // b/2 is subtracted to obtain x0) wrapped to [0, 1) range. +#if defined(SINGLE_PRECISION) + // Use 16-bit initial estimation in case we are using half-width iterations + // for float32 division. This is expected to be useful for some 16-bit + // targets. Not used by default as it requires performing more work during + // rounding and would hardly help on regular 32- or 64-bit targets. + const half_rep_t C_hw = HALF_REP_C(0x7504); +#else + // HW is at least 32. Shifting into the highest bits if needed. + const half_rep_t C_hw = HALF_REP_C(0x7504F333) << (HW - 32); +#endif + + // b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572, + // so x0 fits to UQ0.HW without wrapping. + half_rep_t x_UQ0_hw = C_hw - (b_UQ1_hw /* exact b_hw/2 as UQ0.HW */); + // An e_0 error is comprised of errors due to + // * x0 being an inherently imprecise first approximation of 1/b_hw + // * C_hw being some (irrational) number **truncated** to W0 bits + // Please note that e_0 is calculated against the infinitely precise + // reciprocal of b_hw (that is, **truncated** version of b). + // + // e_0 <= 3/4 - 1/sqrt(2) + 2^-W0 + + // By construction, 1 <= b < 2 + // f(x) = x * (2 - b*x) = 2*x - b*x^2 + // f'(x) = 2 * (1 - b*x) + // + // On the [0, 1] interval, f(0) = 0, + // then it increses until f(1/b) = 1 / b, maximum on (0, 1), + // then it decreses to f(1) = 2 - b + // + // Let g(x) = x - f(x) = b*x^2 - x. + // On (0, 1/b), g(x) < 0 <=> f(x) > x + // On (1/b, 1], g(x) > 0 <=> f(x) < x + // + // For half-width iterations, b_hw is used instead of b. + REPEAT_N_TIMES(NUMBER_OF_HALF_ITERATIONS, { + // corr_UQ1_hw can be **larger** than 2 - b_hw*x by at most 1*Ulp + // of corr_UQ1_hw. + // "0.0 - (...)" is equivalent to "2.0 - (...)" in UQ1.(HW-1). + // On the other hand, corr_UQ1_hw should not overflow from 2.0 to 0.0 provided + // no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is + // expected to be strictly positive because b_UQ1_hw has its highest bit set + // and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1). + half_rep_t corr_UQ1_hw = 0 - ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW); + + // Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally + // obtaining an UQ1.(HW-1) number and proving its highest bit could be + // considered to be 0 to be able to represent it in UQ0.HW. + // From the above analysis of f(x), if corr_UQ1_hw would be represented + // without any intermediate loss of precision (that is, in twice_rep_t) + // x_UQ0_hw could be at most [1.]000... if b_hw is exactly 1.0 and strictly + // less otherwise. On the other hand, to obtain [1.]000..., one have to pass + // 1/b_hw == 1.0 to f(x), so this cannot occur at all without overflow (due + // to 1.0 being not representable as UQ0.HW). + // The fact corr_UQ1_hw was virtually round up (due to result of + // multiplication being **first** truncated, then negated - to improve + // error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw. + x_UQ0_hw = (rep_t)x_UQ0_hw * corr_UQ1_hw >> (HW - 1); + // Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t + // representation. In the latter case, x_UQ0_hw will be either 0 or 1 after + // any number of iterations, so just subtract 2 from the reciprocal + // approximation after last iteration. + + // In infinite precision, with 0 <= eps1, eps2 <= U = 2^-HW: + // corr_UQ1_hw = 2 - (1/b_hw + e_n) * b_hw + 2*eps1 + // = 1 - e_n * b_hw + 2*eps1 + // x_UQ0_hw = (1/b_hw + e_n) * (1 - e_n*b_hw + 2*eps1) - eps2 + // = 1/b_hw - e_n + 2*eps1/b_hw + e_n - e_n^2*b_hw + 2*e_n*eps1 - eps2 + // = 1/b_hw + 2*eps1/b_hw - e_n^2*b_hw + 2*e_n*eps1 - eps2 + // e_{n+1} = -e_n^2*b_hw + 2*eps1/b_hw + 2*e_n*eps1 - eps2 + // = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw + // \------ >0 -------/ \-- >0 ---/ + // abs(e_{n+1}) <= 2*abs(e_n)*U + max(2*e_n^2 + U, 2 * U) + }) + // For initial half-width iterations, U = 2^-HW + // Let abs(e_n) <= u_n * U, + // then abs(e_{n+1}) <= 2 * u_n * U^2 + max(2 * u_n^2 * U^2 + U, 2 * U) + // u_{n+1} <= 2 * u_n * U + max(2 * u_n^2 * U + 1, 2) + + // Account for possible overflow (see above). For an overflow to occur for the + // first time, for "ideal" corr_UQ1_hw (that is, without intermediate + // truncation), the result of x_UQ0_hw * corr_UQ1_hw should be either maximum + // value representable in UQ0.HW or less by 1. This means that 1/b_hw have to + // be not below that value (see g(x) above), so it is safe to decrement just + // once after the final iteration. On the other hand, an effective value of + // divisor changes after this point (from b_hw to b), so adjust here. + x_UQ0_hw -= 1U; + rep_t x_UQ0 = (rep_t)x_UQ0_hw << HW; + x_UQ0 -= 1U; + +#else + // C is (3/4 + 1/sqrt(2)) - 1 truncated to 32 fractional bits as UQ0.n + const rep_t C = REP_C(0x7504F333) << (typeWidth - 32); + rep_t x_UQ0 = C - b_UQ1; + // E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-32 +#endif + + // Error estimations for full-precision iterations are calculated just + // as above, but with U := 2^-W and taking extra decrementing into account. + // We need at least one such iteration. + +#ifdef USE_NATIVE_FULL_ITERATIONS + REPEAT_N_TIMES(NUMBER_OF_FULL_ITERATIONS, { + rep_t corr_UQ1 = 0 - ((twice_rep_t)x_UQ0 * b_UQ1 >> typeWidth); + x_UQ0 = (twice_rep_t)x_UQ0 * corr_UQ1 >> (typeWidth - 1); + }) +#else +#if NUMBER_OF_FULL_ITERATIONS != 1 +#error Only a single emulated full iteration is supported +#endif +#if !(NUMBER_OF_HALF_ITERATIONS > 0) + // Cannot normally reach here: only one full-width iteration is requested and + // the total number of iterations should be at least 3 even for float32. +#error Check NUMBER_OF_HALF_ITERATIONS, NUMBER_OF_FULL_ITERATIONS and USE_NATIVE_FULL_ITERATIONS. +#endif + // Simulating operations on a twice_rep_t to perform a single final full-width + // iteration. Using ad-hoc multiplication implementations to take advantage + // of particular structure of operands. + rep_t blo = b_UQ1 & loMask; + // x_UQ0 = x_UQ0_hw * 2^HW - 1 + // x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1 + // + // <--- higher half ---><--- lower half ---> + // [x_UQ0_hw * b_UQ1_hw] + // + [ x_UQ0_hw * blo ] + // - [ b_UQ1 ] + // = [ result ][.... discarded ...] + rep_t corr_UQ1 = 0U - ( (rep_t)x_UQ0_hw * b_UQ1_hw + + ((rep_t)x_UQ0_hw * blo >> HW) + - REP_C(1)); // account for *possible* carry + rep_t lo_corr = corr_UQ1 & loMask; + rep_t hi_corr = corr_UQ1 >> HW; + // x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1 + x_UQ0 = ((rep_t)x_UQ0_hw * hi_corr << 1) + + ((rep_t)x_UQ0_hw * lo_corr >> (HW - 1)) + - REP_C(2); // 1 to account for the highest bit of corr_UQ1 can be 1 + // 1 to account for possible carry + // Just like the case of half-width iterations but with possibility + // of overflowing by one extra Ulp of x_UQ0. + x_UQ0 -= 1U; + // ... and then traditional fixup by 2 should work + + // On error estimation: + // abs(E_{N-1}) <= (u_{N-1} + 2 /* due to conversion e_n -> E_n */) * 2^-HW + // + (2^-HW + 2^-W)) + // abs(E_{N-1}) <= (u_{N-1} + 3.01) * 2^-HW + + // Then like for the half-width iterations: + // With 0 <= eps1, eps2 < 2^-W + // E_N = 4 * E_{N-1} * eps1 - (E_{N-1}^2 * b + 4 * eps2) + 4 * eps1 / b + // abs(E_N) <= 2^-W * [ 4 * abs(E_{N-1}) + max(2 * abs(E_{N-1})^2 * 2^W + 4, 8)) ] + // abs(E_N) <= 2^-W * [ 4 * (u_{N-1} + 3.01) * 2^-HW + max(4 + 2 * (u_{N-1} + 3.01)^2, 8) ] +#endif + + // Finally, account for possible overflow, as explained above. + x_UQ0 -= 2U; + + // u_n for different precisions (with N-1 half-width iterations): + // W0 is the precision of C + // u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW + + // Estimated with bc: + // define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; } + // define half2(un) { return 2.0 * un / 2.0^hw + 2.0; } + // define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; } + // define full2(un) { return 4.0 * (un + 3.01) / 2.0^hw + 8.0; } + + // | f32 (0 + 3) | f32 (2 + 1) | f64 (3 + 1) | f128 (4 + 1) + // u_0 | < 184224974 | < 2812.1 | < 184224974 | < 791240234244348797 + // u_1 | < 15804007 | < 242.7 | < 15804007 | < 67877681371350440 + // u_2 | < 116308 | < 2.81 | < 116308 | < 499533100252317 + // u_3 | < 7.31 | | < 7.31 | < 27054456580 + // u_4 | | | | < 80.4 + // Final (U_N) | same as u_3 | < 72 | < 218 | < 13920 + + // Add 2 to U_N due to final decrement. + +#if defined(SINGLE_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 2 && NUMBER_OF_FULL_ITERATIONS == 1 +#define RECIPROCAL_PRECISION REP_C(74) +#elif defined(SINGLE_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 0 && NUMBER_OF_FULL_ITERATIONS == 3 +#define RECIPROCAL_PRECISION REP_C(10) +#elif defined(DOUBLE_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 3 && NUMBER_OF_FULL_ITERATIONS == 1 +#define RECIPROCAL_PRECISION REP_C(220) +#elif defined(QUAD_PRECISION) && NUMBER_OF_HALF_ITERATIONS == 4 && NUMBER_OF_FULL_ITERATIONS == 1 +#define RECIPROCAL_PRECISION REP_C(13922) +#else +#error Invalid number of iterations +#endif + + // Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W + x_UQ0 -= RECIPROCAL_PRECISION; + // Now 1/b - (2*P) * 2^-W < x < 1/b + // FIXME Is x_UQ0 still >= 0.5? + + rep_t quotient_UQ1, dummy; + wideMultiply(x_UQ0, aSignificand << 1, "ient_UQ1, &dummy); + // Now, a/b - 4*P * 2^-W < q < a/b for q= in UQ1.(SB+1+W). + + // quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1), + // adjust it to be in [1.0, 2.0) as UQ1.SB. + rep_t residualLo; + if (quotient_UQ1 < (implicitBit << 1)) { + // Highest bit is 0, so just reinterpret quotient_UQ1 as UQ1.SB, + // effectively doubling its value as well as its error estimation. + residualLo = (aSignificand << (significandBits + 1)) - quotient_UQ1 * bSignificand; + writtenExponent -= 1; + aSignificand <<= 1; + } else { + // Highest bit is 1 (the UQ1.(SB+1) value is in [1, 2)), convert it + // to UQ1.SB by right shifting by 1. Least significant bit is omitted. + quotient_UQ1 >>= 1; + residualLo = (aSignificand << significandBits) - quotient_UQ1 * bSignificand; + } + // NB: residualLo is calculated above for the normal result case. + // It is re-computed on denormal path that is expected to be not so + // performance-sensitive. + + // Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB + // Each NextAfter() increments the floating point value by at least 2^-SB + // (more, if exponent was incremented). + // Different cases (<---> is of 2^-SB length, * = a/b that is shown as a midpoint): + // q + // | | * | | | | | + // <---> 2^t + // | | | | | * | | + // q + // To require at most one NextAfter(), an error should be less than 1.5 * 2^-SB. + // (8*P) * 2^-W + 2^-SB < 1.5 * 2^-SB + // (8*P) * 2^-W < 0.5 * 2^-SB + // P < 2^(W-4-SB) + // Generally, for at most R NextAfter() to be enough, + // P < (2*R - 1) * 2^(W-4-SB) + // For f32 (0+3): 10 < 32 (OK) + // For f32 (2+1): 32 < 74 < 32 * 3, so two NextAfter() are required + // For f64: 220 < 256 (OK) + // For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required) + + // If we have overflowed the exponent, return infinity + if (writtenExponent >= maxExponent) + return fromRep(infRep | quotientSign); + + // Now, quotient_UQ1_SB <= the correctly-rounded result + // and may need taking NextAfter() up to 3 times (see error estimates above) + // r = a - b * q + rep_t absResult; + if (writtenExponent > 0) { + // Clear the implicit bit + absResult = quotient_UQ1 & significandMask; + // Insert the exponent + absResult |= (rep_t)writtenExponent << significandBits; + residualLo <<= 1; + } else { + // Prevent shift amount from being negative + if (significandBits + writtenExponent < 0) + return fromRep(quotientSign); + + absResult = quotient_UQ1 >> (-writtenExponent + 1); + + // multiplied by two to prevent shift amount to be negative + residualLo = (aSignificand << (significandBits + writtenExponent)) - (absResult * bSignificand << 1); + } + + // Round + residualLo += absResult & 1; // tie to even + // The above line conditionally turns the below LT comparison into LTE + absResult += residualLo > bSignificand; +#if defined(QUAD_PRECISION) || (defined(SINGLE_PRECISION) && NUMBER_OF_HALF_ITERATIONS > 0) + // Do not round Infinity to NaN + absResult += absResult < infRep && residualLo > (2 + 1) * bSignificand; +#endif +#if defined(QUAD_PRECISION) + absResult += absResult < infRep && residualLo > (4 + 1) * bSignificand; +#endif + return fromRep(absResult | quotientSign); +} diff --git a/wasmrt/llvm-builtins/builtins/fp_extend.h b/wasmrt/llvm-builtins/builtins/fp_extend.h new file mode 100644 index 0000000..eee4722 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fp_extend.h @@ -0,0 +1,99 @@ +//===-lib/fp_extend.h - low precision -> high precision conversion -*- C +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Set source and destination setting +// +//===----------------------------------------------------------------------===// + +#ifndef FP_EXTEND_HEADER +#define FP_EXTEND_HEADER + +#include "int_lib.h" + +#if defined SRC_SINGLE +typedef float src_t; +typedef uint32_t src_rep_t; +#define SRC_REP_C UINT32_C +static const int srcSigBits = 23; +#define src_rep_t_clz clzsi + +#elif defined SRC_DOUBLE +typedef double src_t; +typedef uint64_t src_rep_t; +#define SRC_REP_C UINT64_C +static const int srcSigBits = 52; +static __inline int src_rep_t_clz(src_rep_t a) { +#if defined __LP64__ + return __builtin_clzl(a); +#else + if (a & REP_C(0xffffffff00000000)) + return clzsi(a >> 32); + else + return 32 + clzsi(a & REP_C(0xffffffff)); +#endif +} + +#elif defined SRC_HALF +#ifdef COMPILER_RT_HAS_FLOAT16 +typedef _Float16 src_t; +#else +typedef uint16_t src_t; +#endif +typedef uint16_t src_rep_t; +#define SRC_REP_C UINT16_C +static const int srcSigBits = 10; +#define src_rep_t_clz __builtin_clz + +#else +#error Source should be half, single, or double precision! +#endif // end source precision + +#if defined DST_SINGLE +typedef float dst_t; +typedef uint32_t dst_rep_t; +#define DST_REP_C UINT32_C +static const int dstSigBits = 23; + +#elif defined DST_DOUBLE +typedef double dst_t; +typedef uint64_t dst_rep_t; +#define DST_REP_C UINT64_C +static const int dstSigBits = 52; + +#elif defined DST_QUAD +typedef long double dst_t; +typedef __uint128_t dst_rep_t; +#define DST_REP_C (__uint128_t) +static const int dstSigBits = 112; + +#else +#error Destination should be single, double, or quad precision! +#endif // end destination precision + +// End of specialization parameters. Two helper routines for conversion to and +// from the representation of floating-point data as integer values follow. + +static __inline src_rep_t srcToRep(src_t x) { + const union { + src_t f; + src_rep_t i; + } rep = {.f = x}; + return rep.i; +} + +static __inline dst_t dstFromRep(dst_rep_t x) { + const union { + dst_t f; + dst_rep_t i; + } rep = {.i = x}; + return rep.f; +} +// End helper routines. Conversion implementation follows. + +#endif // FP_EXTEND_HEADER diff --git a/wasmrt/llvm-builtins/builtins/fp_extend_impl.inc b/wasmrt/llvm-builtins/builtins/fp_extend_impl.inc new file mode 100644 index 0000000..d1c9c02 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fp_extend_impl.inc @@ -0,0 +1,107 @@ +//=-lib/fp_extend_impl.inc - low precision -> high precision conversion -*-- -// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a fairly generic conversion from a narrower to a wider +// IEEE-754 floating-point type. The constants and types defined following the +// includes below parameterize the conversion. +// +// It does not support types that don't use the usual IEEE-754 interchange +// formats; specifically, some work would be needed to adapt it to +// (for example) the Intel 80-bit format or PowerPC double-double format. +// +// Note please, however, that this implementation is only intended to support +// *widening* operations; if you need to convert to a *narrower* floating-point +// type (e.g. double -> float), then this routine will not do what you want it +// to. +// +// It also requires that integer types at least as large as both formats +// are available on the target platform; this may pose a problem when trying +// to add support for quad on some 32-bit systems, for example. You also may +// run into trouble finding an appropriate CLZ function for wide source types; +// you will likely need to roll your own on some platforms. +// +// Finally, the following assumptions are made: +// +// 1. Floating-point types and integer types have the same endianness on the +// target platform. +// +// 2. Quiet NaNs, if supported, are indicated by the leading bit of the +// significand field being set. +// +//===----------------------------------------------------------------------===// + +#include "fp_extend.h" + +static __inline dst_t __extendXfYf2__(src_t a) { + // Various constants whose values follow from the type parameters. + // Any reasonable optimizer will fold and propagate all of these. + const int srcBits = sizeof(src_t) * CHAR_BIT; + const int srcExpBits = srcBits - srcSigBits - 1; + const int srcInfExp = (1 << srcExpBits) - 1; + const int srcExpBias = srcInfExp >> 1; + + const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; + const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; + const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); + const src_rep_t srcAbsMask = srcSignMask - 1; + const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); + const src_rep_t srcNaNCode = srcQNaN - 1; + + const int dstBits = sizeof(dst_t) * CHAR_BIT; + const int dstExpBits = dstBits - dstSigBits - 1; + const int dstInfExp = (1 << dstExpBits) - 1; + const int dstExpBias = dstInfExp >> 1; + + const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits; + + // Break a into a sign and representation of the absolute value. + const src_rep_t aRep = srcToRep(a); + const src_rep_t aAbs = aRep & srcAbsMask; + const src_rep_t sign = aRep & srcSignMask; + dst_rep_t absResult; + + // If sizeof(src_rep_t) < sizeof(int), the subtraction result is promoted + // to (signed) int. To avoid that, explicitly cast to src_rep_t. + if ((src_rep_t)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) { + // a is a normal number. + // Extend to the destination type by shifting the significand and + // exponent into the proper position and rebiasing the exponent. + absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits); + absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits; + } + + else if (aAbs >= srcInfinity) { + // a is NaN or infinity. + // Conjure the result by beginning with infinity, then setting the qNaN + // bit (if needed) and right-aligning the rest of the trailing NaN + // payload field. + absResult = (dst_rep_t)dstInfExp << dstSigBits; + absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits); + absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits); + } + + else if (aAbs) { + // a is denormal. + // renormalize the significand and clear the leading bit, then insert + // the correct adjusted exponent in the destination type. + const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal); + absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale); + absResult ^= dstMinNormal; + const int resultExponent = dstExpBias - srcExpBias - scale + 1; + absResult |= (dst_rep_t)resultExponent << dstSigBits; + } + + else { + // a is zero. + absResult = 0; + } + + // Apply the signbit to the absolute value. + const dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits); + return dstFromRep(result); +} diff --git a/wasmrt/llvm-builtins/builtins/fp_fixint_impl.inc b/wasmrt/llvm-builtins/builtins/fp_fixint_impl.inc new file mode 100644 index 0000000..2196d71 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fp_fixint_impl.inc @@ -0,0 +1,40 @@ +//===-- lib/fixdfsi.c - Double-precision -> integer conversion ----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements float to integer conversion for the +// compiler-rt library. +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +static __inline fixint_t __fixint(fp_t a) { + const fixint_t fixint_max = (fixint_t)((~(fixuint_t)0) / 2); + const fixint_t fixint_min = -fixint_max - 1; + // Break a into sign, exponent, significand parts. + const rep_t aRep = toRep(a); + const rep_t aAbs = aRep & absMask; + const fixint_t sign = aRep & signBit ? -1 : 1; + const int exponent = (aAbs >> significandBits) - exponentBias; + const rep_t significand = (aAbs & significandMask) | implicitBit; + + // If exponent is negative, the result is zero. + if (exponent < 0) + return 0; + + // If the value is too large for the integer type, saturate. + if ((unsigned)exponent >= sizeof(fixint_t) * CHAR_BIT) + return sign == 1 ? fixint_max : fixint_min; + + // If 0 <= exponent < significandBits, right shift to get the result. + // Otherwise, shift left. + if (exponent < significandBits) + return sign * (significand >> (significandBits - exponent)); + else + return sign * ((fixint_t)significand << (exponent - significandBits)); +} diff --git a/wasmrt/llvm-builtins/builtins/fp_fixuint_impl.inc b/wasmrt/llvm-builtins/builtins/fp_fixuint_impl.inc new file mode 100644 index 0000000..cb2bf54 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fp_fixuint_impl.inc @@ -0,0 +1,38 @@ +//===-- lib/fixdfsi.c - Double-precision -> integer conversion ----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements float to unsigned integer conversion for the +// compiler-rt library. +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +static __inline fixuint_t __fixuint(fp_t a) { + // Break a into sign, exponent, significand parts. + const rep_t aRep = toRep(a); + const rep_t aAbs = aRep & absMask; + const int sign = aRep & signBit ? -1 : 1; + const int exponent = (aAbs >> significandBits) - exponentBias; + const rep_t significand = (aAbs & significandMask) | implicitBit; + + // If either the value or the exponent is negative, the result is zero. + if (sign == -1 || exponent < 0) + return 0; + + // If the value is too large for the integer type, saturate. + if ((unsigned)exponent >= sizeof(fixuint_t) * CHAR_BIT) + return ~(fixuint_t)0; + + // If 0 <= exponent < significandBits, right shift to get the result. + // Otherwise, shift left. + if (exponent < significandBits) + return significand >> (significandBits - exponent); + else + return (fixuint_t)significand << (exponent - significandBits); +} diff --git a/wasmrt/llvm-builtins/builtins/fp_lib.h b/wasmrt/llvm-builtins/builtins/fp_lib.h new file mode 100644 index 0000000..58eb45f --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fp_lib.h @@ -0,0 +1,424 @@ +//===-- lib/fp_lib.h - Floating-point utilities -------------------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a configuration header for soft-float routines in compiler-rt. +// This file does not provide any part of the compiler-rt interface, but defines +// many useful constants and utility routines that are used in the +// implementation of the soft-float routines in compiler-rt. +// +// Assumes that float, double and long double correspond to the IEEE-754 +// binary32, binary64 and binary 128 types, respectively, and that integer +// endianness matches floating point endianness on the target platform. +// +//===----------------------------------------------------------------------===// + +#ifndef FP_LIB_HEADER +#define FP_LIB_HEADER + +#include "int_lib.h" +#include "int_math.h" +#include +#include +#include + +// x86_64 FreeBSD prior v9.3 define fixed-width types incorrectly in +// 32-bit mode. +#if defined(__FreeBSD__) && defined(__i386__) +#include +#if __FreeBSD_version < 903000 // v9.3 +#define uint64_t unsigned long long +#define int64_t long long +#undef UINT64_C +#define UINT64_C(c) (c##ULL) +#endif +#endif + +#if defined SINGLE_PRECISION + +typedef uint16_t half_rep_t; +typedef uint32_t rep_t; +typedef uint64_t twice_rep_t; +typedef int32_t srep_t; +typedef float fp_t; +#define HALF_REP_C UINT16_C +#define REP_C UINT32_C +#define significandBits 23 + +static __inline int rep_clz(rep_t a) { return clzsi(a); } + +// 32x32 --> 64 bit multiply +static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { + const uint64_t product = (uint64_t)a * b; + *hi = product >> 32; + *lo = product; +} +COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b); + +#elif defined DOUBLE_PRECISION + +typedef uint32_t half_rep_t; +typedef uint64_t rep_t; +typedef int64_t srep_t; +typedef double fp_t; +#define HALF_REP_C UINT32_C +#define REP_C UINT64_C +#define significandBits 52 + +static __inline int rep_clz(rep_t a) { +#if defined __LP64__ + return __builtin_clzl(a); +#else + if (a & REP_C(0xffffffff00000000)) + return clzsi(a >> 32); + else + return 32 + clzsi(a & REP_C(0xffffffff)); +#endif +} + +#define loWord(a) (a & 0xffffffffU) +#define hiWord(a) (a >> 32) + +// 64x64 -> 128 wide multiply for platforms that don't have such an operation; +// many 64-bit platforms have this operation, but they tend to have hardware +// floating-point, so we don't bother with a special case for them here. +static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { + // Each of the component 32x32 -> 64 products + const uint64_t plolo = loWord(a) * loWord(b); + const uint64_t plohi = loWord(a) * hiWord(b); + const uint64_t philo = hiWord(a) * loWord(b); + const uint64_t phihi = hiWord(a) * hiWord(b); + // Sum terms that contribute to lo in a way that allows us to get the carry + const uint64_t r0 = loWord(plolo); + const uint64_t r1 = hiWord(plolo) + loWord(plohi) + loWord(philo); + *lo = r0 + (r1 << 32); + // Sum terms contributing to hi with the carry from lo + *hi = hiWord(plohi) + hiWord(philo) + hiWord(r1) + phihi; +} +#undef loWord +#undef hiWord + +COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b); + +#elif defined QUAD_PRECISION +#if __LDBL_MANT_DIG__ == 113 && defined(__SIZEOF_INT128__) +// TODO: Availability of the *tf functions should not depend on long double +// being IEEE 128, but instead on being able to use a 128-bit floating-point +// type, which includes __float128. +// Right now this (incorrectly) stops the builtins from being used for x86. +#define CRT_LDBL_128BIT +#define CRT_HAS_TF_MODE +#define TF_C(c) c##L +typedef uint64_t half_rep_t; +typedef __uint128_t rep_t; +typedef __int128_t srep_t; +typedef long double fp_t; +#define HALF_REP_C UINT64_C +#define REP_C (__uint128_t) +// Note: Since there is no explicit way to tell compiler the constant is a +// 128-bit integer, we let the constant be casted to 128-bit integer +#define significandBits 112 +#define TF_MANT_DIG (significandBits + 1) + +static __inline int rep_clz(rep_t a) { + const union { + __uint128_t ll; +#if _YUGA_BIG_ENDIAN + struct { + uint64_t high, low; + } s; +#else + struct { + uint64_t low, high; + } s; +#endif + } uu = {.ll = a}; + + uint64_t word; + uint64_t add; + + if (uu.s.high) { + word = uu.s.high; + add = 0; + } else { + word = uu.s.low; + add = 64; + } + return __builtin_clzll(word) + add; +} + +#define Word_LoMask UINT64_C(0x00000000ffffffff) +#define Word_HiMask UINT64_C(0xffffffff00000000) +#define Word_FullMask UINT64_C(0xffffffffffffffff) +#define Word_1(a) (uint64_t)((a >> 96) & Word_LoMask) +#define Word_2(a) (uint64_t)((a >> 64) & Word_LoMask) +#define Word_3(a) (uint64_t)((a >> 32) & Word_LoMask) +#define Word_4(a) (uint64_t)(a & Word_LoMask) + +// 128x128 -> 256 wide multiply for platforms that don't have such an operation; +// many 64-bit platforms have this operation, but they tend to have hardware +// floating-point, so we don't bother with a special case for them here. +static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { + + const uint64_t product11 = Word_1(a) * Word_1(b); + const uint64_t product12 = Word_1(a) * Word_2(b); + const uint64_t product13 = Word_1(a) * Word_3(b); + const uint64_t product14 = Word_1(a) * Word_4(b); + const uint64_t product21 = Word_2(a) * Word_1(b); + const uint64_t product22 = Word_2(a) * Word_2(b); + const uint64_t product23 = Word_2(a) * Word_3(b); + const uint64_t product24 = Word_2(a) * Word_4(b); + const uint64_t product31 = Word_3(a) * Word_1(b); + const uint64_t product32 = Word_3(a) * Word_2(b); + const uint64_t product33 = Word_3(a) * Word_3(b); + const uint64_t product34 = Word_3(a) * Word_4(b); + const uint64_t product41 = Word_4(a) * Word_1(b); + const uint64_t product42 = Word_4(a) * Word_2(b); + const uint64_t product43 = Word_4(a) * Word_3(b); + const uint64_t product44 = Word_4(a) * Word_4(b); + + const __uint128_t sum0 = (__uint128_t)product44; + const __uint128_t sum1 = (__uint128_t)product34 + (__uint128_t)product43; + const __uint128_t sum2 = + (__uint128_t)product24 + (__uint128_t)product33 + (__uint128_t)product42; + const __uint128_t sum3 = (__uint128_t)product14 + (__uint128_t)product23 + + (__uint128_t)product32 + (__uint128_t)product41; + const __uint128_t sum4 = + (__uint128_t)product13 + (__uint128_t)product22 + (__uint128_t)product31; + const __uint128_t sum5 = (__uint128_t)product12 + (__uint128_t)product21; + const __uint128_t sum6 = (__uint128_t)product11; + + const __uint128_t r0 = (sum0 & Word_FullMask) + ((sum1 & Word_LoMask) << 32); + const __uint128_t r1 = (sum0 >> 64) + ((sum1 >> 32) & Word_FullMask) + + (sum2 & Word_FullMask) + ((sum3 << 32) & Word_HiMask); + + *lo = r0 + (r1 << 64); + *hi = (r1 >> 64) + (sum1 >> 96) + (sum2 >> 64) + (sum3 >> 32) + sum4 + + (sum5 << 32) + (sum6 << 64); +} +#undef Word_1 +#undef Word_2 +#undef Word_3 +#undef Word_4 +#undef Word_HiMask +#undef Word_LoMask +#undef Word_FullMask +#endif // __LDBL_MANT_DIG__ == 113 && __SIZEOF_INT128__ +#else +#error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined. +#endif + +#if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) || \ + defined(CRT_LDBL_128BIT) +#define typeWidth (sizeof(rep_t) * CHAR_BIT) +#define exponentBits (typeWidth - significandBits - 1) +#define maxExponent ((1 << exponentBits) - 1) +#define exponentBias (maxExponent >> 1) + +#define implicitBit (REP_C(1) << significandBits) +#define significandMask (implicitBit - 1U) +#define signBit (REP_C(1) << (significandBits + exponentBits)) +#define absMask (signBit - 1U) +#define exponentMask (absMask ^ significandMask) +#define oneRep ((rep_t)exponentBias << significandBits) +#define infRep exponentMask +#define quietBit (implicitBit >> 1) +#define qnanRep (exponentMask | quietBit) + +static __inline rep_t toRep(fp_t x) { + const union { + fp_t f; + rep_t i; + } rep = {.f = x}; + return rep.i; +} + +static __inline fp_t fromRep(rep_t x) { + const union { + fp_t f; + rep_t i; + } rep = {.i = x}; + return rep.f; +} + +static __inline int normalize(rep_t *significand) { + const int shift = rep_clz(*significand) - rep_clz(implicitBit); + *significand <<= shift; + return 1 - shift; +} + +static __inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) { + *hi = *hi << count | *lo >> (typeWidth - count); + *lo = *lo << count; +} + +static __inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, + unsigned int count) { + if (count < typeWidth) { + const bool sticky = (*lo << (typeWidth - count)) != 0; + *lo = *hi << (typeWidth - count) | *lo >> count | sticky; + *hi = *hi >> count; + } else if (count < 2 * typeWidth) { + const bool sticky = *hi << (2 * typeWidth - count) | *lo; + *lo = *hi >> (count - typeWidth) | sticky; + *hi = 0; + } else { + const bool sticky = *hi | *lo; + *lo = sticky; + *hi = 0; + } +} + +// Implements logb methods (logb, logbf, logbl) for IEEE-754. This avoids +// pulling in a libm dependency from compiler-rt, but is not meant to replace +// it (i.e. code calling logb() should get the one from libm, not this), hence +// the __compiler_rt prefix. +static __inline fp_t __compiler_rt_logbX(fp_t x) { + rep_t rep = toRep(x); + int exp = (rep & exponentMask) >> significandBits; + + // Abnormal cases: + // 1) +/- inf returns +inf; NaN returns NaN + // 2) 0.0 returns -inf + if (exp == maxExponent) { + if (((rep & signBit) == 0) || (x != x)) { + return x; // NaN or +inf: return x + } else { + return -x; // -inf: return -x + } + } else if (x == 0.0) { + // 0.0: return -inf + return fromRep(infRep | signBit); + } + + if (exp != 0) { + // Normal number + return exp - exponentBias; // Unbias exponent + } else { + // Subnormal number; normalize and repeat + rep &= absMask; + const int shift = 1 - normalize(&rep); + exp = (rep & exponentMask) >> significandBits; + return exp - exponentBias - shift; // Unbias exponent + } +} + +// Avoid using scalbn from libm. Unlike libc/libm scalbn, this function never +// sets errno on underflow/overflow. +static __inline fp_t __compiler_rt_scalbnX(fp_t x, int y) { + const rep_t rep = toRep(x); + int exp = (rep & exponentMask) >> significandBits; + + if (x == 0.0 || exp == maxExponent) + return x; // +/- 0.0, NaN, or inf: return x + + // Normalize subnormal input. + rep_t sig = rep & significandMask; + if (exp == 0) { + exp += normalize(&sig); + sig &= ~implicitBit; // clear the implicit bit again + } + + if (__builtin_sadd_overflow(exp, y, &exp)) { + // Saturate the exponent, which will guarantee an underflow/overflow below. + exp = (y >= 0) ? INT_MAX : INT_MIN; + } + + // Return this value: [+/-] 1.sig * 2 ** (exp - exponentBias). + const rep_t sign = rep & signBit; + if (exp >= maxExponent) { + // Overflow, which could produce infinity or the largest-magnitude value, + // depending on the rounding mode. + return fromRep(sign | ((rep_t)(maxExponent - 1) << significandBits)) * 2.0f; + } else if (exp <= 0) { + // Subnormal or underflow. Use floating-point multiply to handle truncation + // correctly. + fp_t tmp = fromRep(sign | (REP_C(1) << significandBits) | sig); + exp += exponentBias - 1; + if (exp < 1) + exp = 1; + tmp *= fromRep((rep_t)exp << significandBits); + return tmp; + } else + return fromRep(sign | ((rep_t)exp << significandBits) | sig); +} + +// Avoid using fmax from libm. +static __inline fp_t __compiler_rt_fmaxX(fp_t x, fp_t y) { + // If either argument is NaN, return the other argument. If both are NaN, + // arbitrarily return the second one. Otherwise, if both arguments are +/-0, + // arbitrarily return the first one. + return (crt_isnan(x) || x < y) ? y : x; +} + +#endif + +#if defined(SINGLE_PRECISION) + +static __inline fp_t __compiler_rt_logbf(fp_t x) { + return __compiler_rt_logbX(x); +} +static __inline fp_t __compiler_rt_scalbnf(fp_t x, int y) { + return __compiler_rt_scalbnX(x, y); +} +static __inline fp_t __compiler_rt_fmaxf(fp_t x, fp_t y) { +#if defined(__aarch64__) + // Use __builtin_fmaxf which turns into an fmaxnm instruction on AArch64. + return __builtin_fmaxf(x, y); +#else + // __builtin_fmaxf frequently turns into a libm call, so inline the function. + return __compiler_rt_fmaxX(x, y); +#endif +} + +#elif defined(DOUBLE_PRECISION) + +static __inline fp_t __compiler_rt_logb(fp_t x) { + return __compiler_rt_logbX(x); +} +static __inline fp_t __compiler_rt_scalbn(fp_t x, int y) { + return __compiler_rt_scalbnX(x, y); +} +static __inline fp_t __compiler_rt_fmax(fp_t x, fp_t y) { +#if defined(__aarch64__) + // Use __builtin_fmax which turns into an fmaxnm instruction on AArch64. + return __builtin_fmax(x, y); +#else + // __builtin_fmax frequently turns into a libm call, so inline the function. + return __compiler_rt_fmaxX(x, y); +#endif +} + +#elif defined(QUAD_PRECISION) + +#if defined(CRT_LDBL_128BIT) +static __inline fp_t __compiler_rt_logbl(fp_t x) { + return __compiler_rt_logbX(x); +} +static __inline fp_t __compiler_rt_scalbnl(fp_t x, int y) { + return __compiler_rt_scalbnX(x, y); +} +static __inline fp_t __compiler_rt_fmaxl(fp_t x, fp_t y) { + return __compiler_rt_fmaxX(x, y); +} +#else +// The generic implementation only works for ieee754 floating point. For other +// floating point types, continue to rely on the libm implementation for now. +static __inline long double __compiler_rt_logbl(long double x) { + return crt_logbl(x); +} +static __inline long double __compiler_rt_scalbnl(long double x, int y) { + return crt_scalbnl(x, y); +} +static __inline long double __compiler_rt_fmaxl(long double x, long double y) { + return crt_fmaxl(x, y); +} +#endif // CRT_LDBL_128BIT + +#endif // *_PRECISION + +#endif // FP_LIB_HEADER diff --git a/wasmrt/llvm-builtins/builtins/fp_mode.c b/wasmrt/llvm-builtins/builtins/fp_mode.c new file mode 100644 index 0000000..5186547 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fp_mode.c @@ -0,0 +1,22 @@ +//===----- lib/fp_mode.c - Floaing-point environment mode utilities --C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides a default implementation of fp_mode.h for architectures +// that does not support or does not have an implementation of floating point +// environment mode. +// +//===----------------------------------------------------------------------===// + +#include "fp_mode.h" + +// IEEE-754 default rounding (to nearest, ties to even). +CRT_FE_ROUND_MODE __fe_getround(void) { return CRT_FE_TONEAREST; } + +int __fe_raise_inexact(void) { + return 0; +} diff --git a/wasmrt/llvm-builtins/builtins/fp_mode.h b/wasmrt/llvm-builtins/builtins/fp_mode.h new file mode 100644 index 0000000..5b4969a --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fp_mode.h @@ -0,0 +1,29 @@ +//===----- lib/fp_mode.h - Floaing-point environment mode utilities --C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is not part of the interface of this library. +// +// This file defines an interface for accessing hardware floating point +// environment mode. +// +//===----------------------------------------------------------------------===// + +#ifndef FP_MODE_H +#define FP_MODE_H + +typedef enum { + CRT_FE_TONEAREST, + CRT_FE_DOWNWARD, + CRT_FE_UPWARD, + CRT_FE_TOWARDZERO +} CRT_FE_ROUND_MODE; + +CRT_FE_ROUND_MODE __fe_getround(void); +int __fe_raise_inexact(void); + +#endif // FP_MODE_H diff --git a/wasmrt/llvm-builtins/builtins/fp_mul_impl.inc b/wasmrt/llvm-builtins/builtins/fp_mul_impl.inc new file mode 100644 index 0000000..a93f2d7 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fp_mul_impl.inc @@ -0,0 +1,128 @@ +//===---- lib/fp_mul_impl.inc - floating point multiplication -----*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements soft-float multiplication with the IEEE-754 default +// rounding (to nearest, ties to even). +// +//===----------------------------------------------------------------------===// + +#include "fp_lib.h" + +static __inline fp_t __mulXf3__(fp_t a, fp_t b) { + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; + const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; + const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit; + + rep_t aSignificand = toRep(a) & significandMask; + rep_t bSignificand = toRep(b) & significandMask; + int scale = 0; + + // Detect if a or b is zero, denormal, infinity, or NaN. + if (aExponent - 1U >= maxExponent - 1U || + bExponent - 1U >= maxExponent - 1U) { + + const rep_t aAbs = toRep(a) & absMask; + const rep_t bAbs = toRep(b) & absMask; + + // NaN * anything = qNaN + if (aAbs > infRep) + return fromRep(toRep(a) | quietBit); + // anything * NaN = qNaN + if (bAbs > infRep) + return fromRep(toRep(b) | quietBit); + + if (aAbs == infRep) { + // infinity * non-zero = +/- infinity + if (bAbs) + return fromRep(aAbs | productSign); + // infinity * zero = NaN + else + return fromRep(qnanRep); + } + + if (bAbs == infRep) { + // non-zero * infinity = +/- infinity + if (aAbs) + return fromRep(bAbs | productSign); + // zero * infinity = NaN + else + return fromRep(qnanRep); + } + + // zero * anything = +/- zero + if (!aAbs) + return fromRep(productSign); + // anything * zero = +/- zero + if (!bAbs) + return fromRep(productSign); + + // One or both of a or b is denormal. The other (if applicable) is a + // normal number. Renormalize one or both of a and b, and set scale to + // include the necessary exponent adjustment. + if (aAbs < implicitBit) + scale += normalize(&aSignificand); + if (bAbs < implicitBit) + scale += normalize(&bSignificand); + } + + // Set the implicit significand bit. If we fell through from the + // denormal path it was already set by normalize( ), but setting it twice + // won't hurt anything. + aSignificand |= implicitBit; + bSignificand |= implicitBit; + + // Perform a basic multiplication on the significands. One of them must be + // shifted beforehand to be aligned with the exponent. + rep_t productHi, productLo; + wideMultiply(aSignificand, bSignificand << exponentBits, &productHi, + &productLo); + + int productExponent = aExponent + bExponent - exponentBias + scale; + + // Normalize the significand and adjust the exponent if needed. + if (productHi & implicitBit) + productExponent++; + else + wideLeftShift(&productHi, &productLo, 1); + + // If we have overflowed the type, return +/- infinity. + if (productExponent >= maxExponent) + return fromRep(infRep | productSign); + + if (productExponent <= 0) { + // The result is denormal before rounding. + // + // If the result is so small that it just underflows to zero, return + // zero with the appropriate sign. Mathematically, there is no need to + // handle this case separately, but we make it a special case to + // simplify the shift logic. + const unsigned int shift = REP_C(1) - (unsigned int)productExponent; + if (shift >= typeWidth) + return fromRep(productSign); + + // Otherwise, shift the significand of the result so that the round + // bit is the high bit of productLo. + wideRightShiftWithSticky(&productHi, &productLo, shift); + } else { + // The result is normal before rounding. Insert the exponent. + productHi &= significandMask; + productHi |= (rep_t)productExponent << significandBits; + } + + // Insert the sign of the result. + productHi |= productSign; + + // Perform the final rounding. The final result may overflow to infinity, + // or underflow to zero, but those are the correct results in those cases. + // We use the default IEEE-754 round-to-nearest, ties-to-even rounding mode. + if (productLo > signBit) + productHi++; + if (productLo == signBit) + productHi += productHi & 1; + return fromRep(productHi); +} diff --git a/wasmrt/llvm-builtins/builtins/fp_trunc.h b/wasmrt/llvm-builtins/builtins/fp_trunc.h new file mode 100644 index 0000000..91f6145 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fp_trunc.h @@ -0,0 +1,91 @@ +//=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Set source and destination precision setting +// +//===----------------------------------------------------------------------===// + +#ifndef FP_TRUNC_HEADER +#define FP_TRUNC_HEADER + +#include "int_lib.h" + +#if defined SRC_SINGLE +typedef float src_t; +typedef uint32_t src_rep_t; +#define SRC_REP_C UINT32_C +static const int srcSigBits = 23; + +#elif defined SRC_DOUBLE +typedef double src_t; +typedef uint64_t src_rep_t; +#define SRC_REP_C UINT64_C +static const int srcSigBits = 52; + +#elif defined SRC_QUAD +typedef long double src_t; +typedef __uint128_t src_rep_t; +#define SRC_REP_C (__uint128_t) +static const int srcSigBits = 112; + +#else +#error Source should be double precision or quad precision! +#endif // end source precision + +#if defined DST_DOUBLE +typedef double dst_t; +typedef uint64_t dst_rep_t; +#define DST_REP_C UINT64_C +static const int dstSigBits = 52; + +#elif defined DST_SINGLE +typedef float dst_t; +typedef uint32_t dst_rep_t; +#define DST_REP_C UINT32_C +static const int dstSigBits = 23; + +#elif defined DST_HALF +#ifdef COMPILER_RT_HAS_FLOAT16 +typedef _Float16 dst_t; +#else +typedef uint16_t dst_t; +#endif +typedef uint16_t dst_rep_t; +#define DST_REP_C UINT16_C +static const int dstSigBits = 10; + +#elif defined DST_BFLOAT +typedef __bf16 dst_t; +typedef uint16_t dst_rep_t; +#define DST_REP_C UINT16_C +static const int dstSigBits = 7; + +#else +#error Destination should be single precision or double precision! +#endif // end destination precision + +// End of specialization parameters. Two helper routines for conversion to and +// from the representation of floating-point data as integer values follow. + +static __inline src_rep_t srcToRep(src_t x) { + const union { + src_t f; + src_rep_t i; + } rep = {.f = x}; + return rep.i; +} + +static __inline dst_t dstFromRep(dst_rep_t x) { + const union { + dst_t f; + dst_rep_t i; + } rep = {.i = x}; + return rep.f; +} + +#endif // FP_TRUNC_HEADER diff --git a/wasmrt/llvm-builtins/builtins/fp_trunc_impl.inc b/wasmrt/llvm-builtins/builtins/fp_trunc_impl.inc new file mode 100644 index 0000000..6662be7 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/fp_trunc_impl.inc @@ -0,0 +1,132 @@ +//= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a fairly generic conversion from a wider to a narrower +// IEEE-754 floating-point type in the default (round to nearest, ties to even) +// rounding mode. The constants and types defined following the includes below +// parameterize the conversion. +// +// This routine can be trivially adapted to support conversions to +// half-precision or from quad-precision. It does not support types that don't +// use the usual IEEE-754 interchange formats; specifically, some work would be +// needed to adapt it to (for example) the Intel 80-bit format or PowerPC +// double-double format. +// +// Note please, however, that this implementation is only intended to support +// *narrowing* operations; if you need to convert to a *wider* floating-point +// type (e.g. float -> double), then this routine will not do what you want it +// to. +// +// It also requires that integer types at least as large as both formats +// are available on the target platform; this may pose a problem when trying +// to add support for quad on some 32-bit systems, for example. +// +// Finally, the following assumptions are made: +// +// 1. Floating-point types and integer types have the same endianness on the +// target platform. +// +// 2. Quiet NaNs, if supported, are indicated by the leading bit of the +// significand field being set. +// +//===----------------------------------------------------------------------===// + +#include "fp_trunc.h" + +static __inline dst_t __truncXfYf2__(src_t a) { + // Various constants whose values follow from the type parameters. + // Any reasonable optimizer will fold and propagate all of these. + const int srcBits = sizeof(src_t) * CHAR_BIT; + const int srcExpBits = srcBits - srcSigBits - 1; + const int srcInfExp = (1 << srcExpBits) - 1; + const int srcExpBias = srcInfExp >> 1; + + const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits; + const src_rep_t srcSignificandMask = srcMinNormal - 1; + const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits; + const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits); + const src_rep_t srcAbsMask = srcSignMask - 1; + const src_rep_t roundMask = (SRC_REP_C(1) << (srcSigBits - dstSigBits)) - 1; + const src_rep_t halfway = SRC_REP_C(1) << (srcSigBits - dstSigBits - 1); + const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1); + const src_rep_t srcNaNCode = srcQNaN - 1; + + const int dstBits = sizeof(dst_t) * CHAR_BIT; + const int dstExpBits = dstBits - dstSigBits - 1; + const int dstInfExp = (1 << dstExpBits) - 1; + const int dstExpBias = dstInfExp >> 1; + + const int underflowExponent = srcExpBias + 1 - dstExpBias; + const int overflowExponent = srcExpBias + dstInfExp - dstExpBias; + const src_rep_t underflow = (src_rep_t)underflowExponent << srcSigBits; + const src_rep_t overflow = (src_rep_t)overflowExponent << srcSigBits; + + const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigBits - 1); + const dst_rep_t dstNaNCode = dstQNaN - 1; + + // Break a into a sign and representation of the absolute value. + const src_rep_t aRep = srcToRep(a); + const src_rep_t aAbs = aRep & srcAbsMask; + const src_rep_t sign = aRep & srcSignMask; + dst_rep_t absResult; + + if (aAbs - underflow < aAbs - overflow) { + // The exponent of a is within the range of normal numbers in the + // destination format. We can convert by simply right-shifting with + // rounding and adjusting the exponent. + absResult = aAbs >> (srcSigBits - dstSigBits); + absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits; + + const src_rep_t roundBits = aAbs & roundMask; + // Round to nearest. + if (roundBits > halfway) + absResult++; + // Tie to even. + else if (roundBits == halfway) + absResult += absResult & 1; + } else if (aAbs > srcInfinity) { + // a is NaN. + // Conjure the result by beginning with infinity, setting the qNaN + // bit and inserting the (truncated) trailing NaN field. + absResult = (dst_rep_t)dstInfExp << dstSigBits; + absResult |= dstQNaN; + absResult |= + ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode; + } else if (aAbs >= overflow) { + // a overflows to infinity. + absResult = (dst_rep_t)dstInfExp << dstSigBits; + } else { + // a underflows on conversion to the destination type or is an exact + // zero. The result may be a denormal or zero. Extract the exponent + // to get the shift amount for the denormalization. + const int aExp = aAbs >> srcSigBits; + const int shift = srcExpBias - dstExpBias - aExp + 1; + + const src_rep_t significand = (aRep & srcSignificandMask) | srcMinNormal; + + // Right shift by the denormalization amount with sticky. + if (shift > srcSigBits) { + absResult = 0; + } else { + const bool sticky = (significand << (srcBits - shift)) != 0; + src_rep_t denormalizedSignificand = significand >> shift | sticky; + absResult = denormalizedSignificand >> (srcSigBits - dstSigBits); + const src_rep_t roundBits = denormalizedSignificand & roundMask; + // Round to nearest + if (roundBits > halfway) + absResult++; + // Ties to even + else if (roundBits == halfway) + absResult += absResult & 1; + } + } + + // Apply the signbit to the absolute value. + const dst_rep_t result = absResult | sign >> (srcBits - dstBits); + return dstFromRep(result); +} diff --git a/wasmrt/llvm-builtins/builtins/gcc_personality_v0.c b/wasmrt/llvm-builtins/builtins/gcc_personality_v0.c new file mode 100644 index 0000000..ef63a5f --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/gcc_personality_v0.c @@ -0,0 +1,257 @@ +//===-- gcc_personality_v0.c - Implement __gcc_personality_v0 -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include + +#include +#if defined(__arm__) && !defined(__ARM_DWARF_EH__) && \ + !defined(__USING_SJLJ_EXCEPTIONS__) +// When building with older compilers (e.g. clang <3.9), it is possible that we +// have a version of unwind.h which does not provide the EHABI declarations +// which are quired for the C personality to conform to the specification. In +// order to provide forward compatibility for such compilers, we re-declare the +// necessary interfaces in the helper to permit a standalone compilation of the +// builtins (which contains the C unwinding personality for historical reasons). +#include "unwind-ehabi-helpers.h" +#endif + +#if defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__) +#include +#include + +EXCEPTION_DISPOSITION _GCC_specific_handler(PEXCEPTION_RECORD, void *, PCONTEXT, + PDISPATCHER_CONTEXT, + _Unwind_Personality_Fn); +#endif + +// Pointer encodings documented at: +// http://refspecs.freestandards.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html + +#define DW_EH_PE_omit 0xff // no data follows + +#define DW_EH_PE_absptr 0x00 +#define DW_EH_PE_uleb128 0x01 +#define DW_EH_PE_udata2 0x02 +#define DW_EH_PE_udata4 0x03 +#define DW_EH_PE_udata8 0x04 +#define DW_EH_PE_sleb128 0x09 +#define DW_EH_PE_sdata2 0x0A +#define DW_EH_PE_sdata4 0x0B +#define DW_EH_PE_sdata8 0x0C + +#define DW_EH_PE_pcrel 0x10 +#define DW_EH_PE_textrel 0x20 +#define DW_EH_PE_datarel 0x30 +#define DW_EH_PE_funcrel 0x40 +#define DW_EH_PE_aligned 0x50 +#define DW_EH_PE_indirect 0x80 // gcc extension + +// read a uleb128 encoded value and advance pointer +static size_t readULEB128(const uint8_t **data) { + size_t result = 0; + size_t shift = 0; + unsigned char byte; + const uint8_t *p = *data; + do { + byte = *p++; + result |= (byte & 0x7f) << shift; + shift += 7; + } while (byte & 0x80); + *data = p; + return result; +} + +// read a pointer encoded value and advance pointer +static uintptr_t readEncodedPointer(const uint8_t **data, uint8_t encoding) { + const uint8_t *p = *data; + uintptr_t result = 0; + + if (encoding == DW_EH_PE_omit) + return 0; + + // first get value + switch (encoding & 0x0F) { + case DW_EH_PE_absptr: + result = *((const uintptr_t *)p); + p += sizeof(uintptr_t); + break; + case DW_EH_PE_uleb128: + result = readULEB128(&p); + break; + case DW_EH_PE_udata2: + result = *((const uint16_t *)p); + p += sizeof(uint16_t); + break; + case DW_EH_PE_udata4: + result = *((const uint32_t *)p); + p += sizeof(uint32_t); + break; + case DW_EH_PE_udata8: + result = *((const uint64_t *)p); + p += sizeof(uint64_t); + break; + case DW_EH_PE_sdata2: + result = *((const int16_t *)p); + p += sizeof(int16_t); + break; + case DW_EH_PE_sdata4: + result = *((const int32_t *)p); + p += sizeof(int32_t); + break; + case DW_EH_PE_sdata8: + result = *((const int64_t *)p); + p += sizeof(int64_t); + break; + case DW_EH_PE_sleb128: + default: + // not supported + compilerrt_abort(); + break; + } + + // then add relative offset + switch (encoding & 0x70) { + case DW_EH_PE_absptr: + // do nothing + break; + case DW_EH_PE_pcrel: + result += (uintptr_t)(*data); + break; + case DW_EH_PE_textrel: + case DW_EH_PE_datarel: + case DW_EH_PE_funcrel: + case DW_EH_PE_aligned: + default: + // not supported + compilerrt_abort(); + break; + } + + // then apply indirection + if (encoding & DW_EH_PE_indirect) { + result = *((const uintptr_t *)result); + } + + *data = p; + return result; +} + +#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \ + !defined(__ARM_DWARF_EH__) && !defined(__SEH__) +#define USING_ARM_EHABI 1 +_Unwind_Reason_Code __gnu_unwind_frame(struct _Unwind_Exception *, + struct _Unwind_Context *); +#endif + +static inline _Unwind_Reason_Code +continueUnwind(struct _Unwind_Exception *exceptionObject, + struct _Unwind_Context *context) { +#if USING_ARM_EHABI + // On ARM EHABI the personality routine is responsible for actually + // unwinding a single stack frame before returning (ARM EHABI Sec. 6.1). + if (__gnu_unwind_frame(exceptionObject, context) != _URC_OK) + return _URC_FAILURE; +#endif + return _URC_CONTINUE_UNWIND; +} + +// The C compiler makes references to __gcc_personality_v0 in +// the dwarf unwind information for translation units that use +// __attribute__((cleanup(xx))) on local variables. +// This personality routine is called by the system unwinder +// on each frame as the stack is unwound during a C++ exception +// throw through a C function compiled with -fexceptions. +#if __USING_SJLJ_EXCEPTIONS__ +// the setjump-longjump based exceptions personality routine has a +// different name +COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_sj0( + int version, _Unwind_Action actions, uint64_t exceptionClass, + struct _Unwind_Exception *exceptionObject, struct _Unwind_Context *context) +#elif USING_ARM_EHABI +// The ARM EHABI personality routine has a different signature. +COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_v0( + _Unwind_State state, struct _Unwind_Exception *exceptionObject, + struct _Unwind_Context *context) +#elif defined(__SEH__) +static _Unwind_Reason_Code __gcc_personality_imp( + int version, _Unwind_Action actions, uint64_t exceptionClass, + struct _Unwind_Exception *exceptionObject, struct _Unwind_Context *context) +#else +COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_v0( + int version, _Unwind_Action actions, uint64_t exceptionClass, + struct _Unwind_Exception *exceptionObject, struct _Unwind_Context *context) +#endif +{ + // Since C does not have catch clauses, there is nothing to do during + // phase 1 (the search phase). +#if USING_ARM_EHABI + // After resuming from a cleanup we should also continue on to the next + // frame straight away. + if ((state & _US_ACTION_MASK) != _US_UNWIND_FRAME_STARTING) +#else + if (actions & _UA_SEARCH_PHASE) +#endif + return continueUnwind(exceptionObject, context); + + // There is nothing to do if there is no LSDA for this frame. + const uint8_t *lsda = (uint8_t *)_Unwind_GetLanguageSpecificData(context); + if (lsda == (uint8_t *)0) + return continueUnwind(exceptionObject, context); + + uintptr_t pc = (uintptr_t)_Unwind_GetIP(context) - 1; + uintptr_t funcStart = (uintptr_t)_Unwind_GetRegionStart(context); + uintptr_t pcOffset = pc - funcStart; + + // Parse LSDA header. + uint8_t lpStartEncoding = *lsda++; + if (lpStartEncoding != DW_EH_PE_omit) { + readEncodedPointer(&lsda, lpStartEncoding); + } + uint8_t ttypeEncoding = *lsda++; + if (ttypeEncoding != DW_EH_PE_omit) { + readULEB128(&lsda); + } + // Walk call-site table looking for range that includes current PC. + uint8_t callSiteEncoding = *lsda++; + size_t callSiteTableLength = readULEB128(&lsda); + const uint8_t *callSiteTableStart = lsda; + const uint8_t *callSiteTableEnd = callSiteTableStart + callSiteTableLength; + const uint8_t *p = callSiteTableStart; + while (p < callSiteTableEnd) { + uintptr_t start = readEncodedPointer(&p, callSiteEncoding); + size_t length = readEncodedPointer(&p, callSiteEncoding); + size_t landingPad = readEncodedPointer(&p, callSiteEncoding); + readULEB128(&p); // action value not used for C code + if (landingPad == 0) + continue; // no landing pad for this entry + if ((start <= pcOffset) && (pcOffset < (start + length))) { + // Found landing pad for the PC. + // Set Instruction Pointer to so we re-enter function + // at landing pad. The landing pad is created by the compiler + // to take two parameters in registers. + _Unwind_SetGR(context, __builtin_eh_return_data_regno(0), + (uintptr_t)exceptionObject); + _Unwind_SetGR(context, __builtin_eh_return_data_regno(1), 0); + _Unwind_SetIP(context, (funcStart + landingPad)); + return _URC_INSTALL_CONTEXT; + } + } + + // No landing pad found, continue unwinding. + return continueUnwind(exceptionObject, context); +} + +#if defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__) +COMPILER_RT_ABI EXCEPTION_DISPOSITION +__gcc_personality_seh0(PEXCEPTION_RECORD ms_exc, void *this_frame, + PCONTEXT ms_orig_context, PDISPATCHER_CONTEXT ms_disp) { + return _GCC_specific_handler(ms_exc, this_frame, ms_orig_context, ms_disp, + __gcc_personality_imp); +} +#endif diff --git a/wasmrt/llvm-builtins/builtins/hexagon/common_entry_exit_abi1.S b/wasmrt/llvm-builtins/builtins/hexagon/common_entry_exit_abi1.S new file mode 100644 index 0000000..23fed01 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/hexagon/common_entry_exit_abi1.S @@ -0,0 +1,102 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Functions that implement common sequences in function prologues and epilogues +// used to save code size + + .macro FUNCTION_BEGIN name + .text + .globl \name + .type \name, @function + .falign +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + + .macro FALLTHROUGH_TAIL_CALL name0 name1 + .size \name0, . - \name0 + .globl \name1 + .type \name1, @function + .falign +\name1: + .endm + + + + +// Save r25:24 at fp+#-8 and r27:26 at fp+#-16. + + + + +// The compiler knows that the __save_* functions clobber LR. No other +// registers should be used without informing the compiler. + +// Since we can only issue one store per packet, we don't hurt performance by +// simply jumping to the right point in this sequence of stores. + +FUNCTION_BEGIN __save_r24_through_r27 + memd(fp+#-16) = r27:26 +FALLTHROUGH_TAIL_CALL __save_r24_through_r27 __save_r24_through_r25 + { + memd(fp+#-8) = r25:24 + jumpr lr + } +FUNCTION_END __save_r24_through_r25 + + + + +// For each of the *_before_tailcall functions, jumpr lr is executed in parallel +// with deallocframe. That way, the return gets the old value of lr, which is +// where these functions need to return, and at the same time, lr gets the value +// it needs going into the tail call. + +FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe_before_tailcall + r27:26 = memd(fp+#-16) +FALLTHROUGH_TAIL_CALL __restore_r24_through_r27_and_deallocframe_before_tailcall __restore_r24_through_r25_and_deallocframe_before_tailcall + { + r25:24 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r24_through_r25_and_deallocframe_before_tailcall + + + + +// Here we use the extra load bandwidth to restore LR early, allowing the return +// to occur in parallel with the deallocframe. + +FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe + { + lr = memw(fp+#4) + r27:26 = memd(fp+#-16) + } + { + r25:24 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r24_through_r27_and_deallocframe + + + + +// Here the load bandwidth is maximized. + +FUNCTION_BEGIN __restore_r24_through_r25_and_deallocframe + { + r25:24 = memd(fp+#-8) + deallocframe + } + jumpr lr +FUNCTION_END __restore_r24_through_r25_and_deallocframe diff --git a/wasmrt/llvm-builtins/builtins/hexagon/common_entry_exit_abi2.S b/wasmrt/llvm-builtins/builtins/hexagon/common_entry_exit_abi2.S new file mode 100644 index 0000000..3b85aea --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/hexagon/common_entry_exit_abi2.S @@ -0,0 +1,267 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Functions that implement common sequences in function prologues and epilogues +// used to save code size + + .macro FUNCTION_BEGIN name + .p2align 2 + .section .text.\name,"ax",@progbits + .globl \name + .type \name, @function +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + + .macro FALLTHROUGH_TAIL_CALL name0 name1 + .p2align 2 + .size \name0, . - \name0 + .globl \name1 + .type \name1, @function +\name1: + .endm + + + + +// Save r17:16 at fp+#-8, r19:18 at fp+#-16, r21:20 at fp+#-24, r23:22 at +// fp+#-32, r25:24 at fp+#-40, and r27:26 at fp+#-48. +// The compiler knows that the __save_* functions clobber LR. No other +// registers should be used without informing the compiler. + +FUNCTION_BEGIN __save_r16_through_r27 + { + memd(fp+#-48) = r27:26 + memd(fp+#-40) = r25:24 + } + { + memd(fp+#-32) = r23:22 + memd(fp+#-24) = r21:20 + } + { + memd(fp+#-16) = r19:18 + memd(fp+#-8) = r17:16 + jumpr lr + } +FUNCTION_END __save_r16_through_r27 + +FUNCTION_BEGIN __save_r16_through_r25 + { + memd(fp+#-40) = r25:24 + memd(fp+#-32) = r23:22 + } + { + memd(fp+#-24) = r21:20 + memd(fp+#-16) = r19:18 + } + { + memd(fp+#-8) = r17:16 + jumpr lr + } +FUNCTION_END __save_r16_through_r25 + +FUNCTION_BEGIN __save_r16_through_r23 + { + memd(fp+#-32) = r23:22 + memd(fp+#-24) = r21:20 + } + { + memd(fp+#-16) = r19:18 + memd(fp+#-8) = r17:16 + jumpr lr + } +FUNCTION_END __save_r16_through_r23 + +FUNCTION_BEGIN __save_r16_through_r21 + { + memd(fp+#-24) = r21:20 + memd(fp+#-16) = r19:18 + } + { + memd(fp+#-8) = r17:16 + jumpr lr + } +FUNCTION_END __save_r16_through_r21 + +FUNCTION_BEGIN __save_r16_through_r19 + { + memd(fp+#-16) = r19:18 + memd(fp+#-8) = r17:16 + jumpr lr + } +FUNCTION_END __save_r16_through_r19 + +FUNCTION_BEGIN __save_r16_through_r17 + { + memd(fp+#-8) = r17:16 + jumpr lr + } +FUNCTION_END __save_r16_through_r17 + +// For each of the *_before_tailcall functions, jumpr lr is executed in parallel +// with deallocframe. That way, the return gets the old value of lr, which is +// where these functions need to return, and at the same time, lr gets the value +// it needs going into the tail call. + + +FUNCTION_BEGIN __restore_r16_through_r27_and_deallocframe_before_tailcall + r27:26 = memd(fp+#-48) + { + r25:24 = memd(fp+#-40) + r23:22 = memd(fp+#-32) + } + { + r21:20 = memd(fp+#-24) + r19:18 = memd(fp+#-16) + } + { + r17:16 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r16_through_r27_and_deallocframe_before_tailcall + +FUNCTION_BEGIN __restore_r16_through_r25_and_deallocframe_before_tailcall + { + r25:24 = memd(fp+#-40) + r23:22 = memd(fp+#-32) + } + { + r21:20 = memd(fp+#-24) + r19:18 = memd(fp+#-16) + } + { + r17:16 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r16_through_r25_and_deallocframe_before_tailcall + +FUNCTION_BEGIN __restore_r16_through_r23_and_deallocframe_before_tailcall + { + r23:22 = memd(fp+#-32) + r21:20 = memd(fp+#-24) + } + r19:18 = memd(fp+#-16) + { + r17:16 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r16_through_r23_and_deallocframe_before_tailcall + + +FUNCTION_BEGIN __restore_r16_through_r21_and_deallocframe_before_tailcall + { + r21:20 = memd(fp+#-24) + r19:18 = memd(fp+#-16) + } + { + r17:16 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r16_through_r19_and_deallocframe_before_tailcall + +FUNCTION_BEGIN __restore_r16_through_r19_and_deallocframe_before_tailcall + r19:18 = memd(fp+#-16) + { + r17:16 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r16_through_r19_and_deallocframe_before_tailcall + +FUNCTION_BEGIN __restore_r16_through_r17_and_deallocframe_before_tailcall + { + r17:16 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r16_through_r17_and_deallocframe_before_tailcall + + +FUNCTION_BEGIN __restore_r16_through_r27_and_deallocframe + r27:26 = memd(fp+#-48) + { + r25:24 = memd(fp+#-40) + r23:22 = memd(fp+#-32) + } + { + r21:20 = memd(fp+#-24) + r19:18 = memd(fp+#-16) + } + { + r17:16 = memd(fp+#-8) + dealloc_return + } +FUNCTION_END __restore_r16_through_r27_and_deallocframe + +FUNCTION_BEGIN __restore_r16_through_r25_and_deallocframe + { + r25:24 = memd(fp+#-40) + r23:22 = memd(fp+#-32) + } + { + r21:20 = memd(fp+#-24) + r19:18 = memd(fp+#-16) + } + { + r17:16 = memd(fp+#-8) + dealloc_return + } +FUNCTION_END __restore_r16_through_r25_and_deallocframe + +FUNCTION_BEGIN __restore_r16_through_r23_and_deallocframe + { + r23:22 = memd(fp+#-32) + } + { + r21:20 = memd(fp+#-24) + r19:18 = memd(fp+#-16) + } + { + r17:16 = memd(fp+#-8) + dealloc_return + } +FUNCTION_END __restore_r16_through_r23_and_deallocframe + +FUNCTION_BEGIN __restore_r16_through_r21_and_deallocframe + { + r21:20 = memd(fp+#-24) + r19:18 = memd(fp+#-16) + } + { + r17:16 = memd(fp+#-8) + dealloc_return + } +FUNCTION_END __restore_r16_through_r21_and_deallocframe + +FUNCTION_BEGIN __restore_r16_through_r19_and_deallocframe + { + r19:18 = memd(fp+#-16) + r17:16 = memd(fp+#-8) + } + { + dealloc_return + } +FUNCTION_END __restore_r16_through_r19_and_deallocframe + +FUNCTION_BEGIN __restore_r16_through_r17_and_deallocframe + { + r17:16 = memd(fp+#-8) + dealloc_return + } +FUNCTION_END __restore_r16_through_r17_and_deallocframe + +FUNCTION_BEGIN __deallocframe + dealloc_return +FUNCTION_END __deallocframe diff --git a/wasmrt/llvm-builtins/builtins/hexagon/common_entry_exit_legacy.S b/wasmrt/llvm-builtins/builtins/hexagon/common_entry_exit_legacy.S new file mode 100644 index 0000000..8a60445 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/hexagon/common_entry_exit_legacy.S @@ -0,0 +1,156 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + + +// Functions that implement common sequences in function prologues and epilogues +// used to save code size + + .macro FUNCTION_BEGIN name + .text + .globl \name + .type \name, @function + .falign +\name: + .endm + + .macro FUNCTION_END name + .size \name, . - \name + .endm + + .macro FALLTHROUGH_TAIL_CALL name0 name1 + .size \name0, . - \name0 + .globl \name1 + .type \name1, @function + .falign +\name1: + .endm + + + + +// Save r27:26 at fp+#-8, r25:24 at fp+#-16, r23:22 at fp+#-24, r21:20 at +// fp+#-32, r19:18 at fp+#-40, and r17:16 at fp+#-48. + + + + +// The compiler knows that the __save_* functions clobber LR. No other +// registers should be used without informing the compiler. + +// Since we can only issue one store per packet, we don't hurt performance by +// simply jumping to the right point in this sequence of stores. + +FUNCTION_BEGIN __save_r27_through_r16 + memd(fp+#-48) = r17:16 +FALLTHROUGH_TAIL_CALL __save_r27_through_r16 __save_r27_through_r18 + memd(fp+#-40) = r19:18 +FALLTHROUGH_TAIL_CALL __save_r27_through_r18 __save_r27_through_r20 + memd(fp+#-32) = r21:20 +FALLTHROUGH_TAIL_CALL __save_r27_through_r20 __save_r27_through_r22 + memd(fp+#-24) = r23:22 +FALLTHROUGH_TAIL_CALL __save_r27_through_r22 __save_r27_through_r24 + memd(fp+#-16) = r25:24 + { + memd(fp+#-8) = r27:26 + jumpr lr + } +FUNCTION_END __save_r27_through_r24 + + + + +// For each of the *_before_sibcall functions, jumpr lr is executed in parallel +// with deallocframe. That way, the return gets the old value of lr, which is +// where these functions need to return, and at the same time, lr gets the value +// it needs going into the sibcall. + +FUNCTION_BEGIN __restore_r27_through_r20_and_deallocframe_before_sibcall + { + r21:20 = memd(fp+#-32) + r23:22 = memd(fp+#-24) + } +FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe_before_sibcall __restore_r27_through_r24_and_deallocframe_before_sibcall + { + r25:24 = memd(fp+#-16) + jump __restore_r27_through_r26_and_deallocframe_before_sibcall + } +FUNCTION_END __restore_r27_through_r24_and_deallocframe_before_sibcall + + + + +FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe_before_sibcall + r17:16 = memd(fp+#-48) +FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe_before_sibcall __restore_r27_through_r18_and_deallocframe_before_sibcall + { + r19:18 = memd(fp+#-40) + r21:20 = memd(fp+#-32) + } +FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe_before_sibcall __restore_r27_through_r22_and_deallocframe_before_sibcall + { + r23:22 = memd(fp+#-24) + r25:24 = memd(fp+#-16) + } +FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe_before_sibcall __restore_r27_through_r26_and_deallocframe_before_sibcall + { + r27:26 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r27_through_r26_and_deallocframe_before_sibcall + + + + +// Here we use the extra load bandwidth to restore LR early, allowing the return +// to occur in parallel with the deallocframe. + +FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe + { + r17:16 = memd(fp+#-48) + r19:18 = memd(fp+#-40) + } +FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe __restore_r27_through_r20_and_deallocframe + { + r21:20 = memd(fp+#-32) + r23:22 = memd(fp+#-24) + } +FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe __restore_r27_through_r24_and_deallocframe + { + lr = memw(fp+#4) + r25:24 = memd(fp+#-16) + } + { + r27:26 = memd(fp+#-8) + deallocframe + jumpr lr + } +FUNCTION_END __restore_r27_through_r24_and_deallocframe + + + + +// Here the load bandwidth is maximized for all three functions. + +FUNCTION_BEGIN __restore_r27_through_r18_and_deallocframe + { + r19:18 = memd(fp+#-40) + r21:20 = memd(fp+#-32) + } +FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe __restore_r27_through_r22_and_deallocframe + { + r23:22 = memd(fp+#-24) + r25:24 = memd(fp+#-16) + } +FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe __restore_r27_through_r26_and_deallocframe + { + r27:26 = memd(fp+#-8) + deallocframe + } + jumpr lr +FUNCTION_END __restore_r27_through_r26_and_deallocframe diff --git a/wasmrt/llvm-builtins/builtins/hexagon/dfaddsub.S b/wasmrt/llvm-builtins/builtins/hexagon/dfaddsub.S new file mode 100644 index 0000000..1b0d345 --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/hexagon/dfaddsub.S @@ -0,0 +1,396 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Double Precision Multiply + +#define A r1:0 +#define AH r1 +#define AL r0 +#define B r3:2 +#define BH r3 +#define BL r2 + +#define EXPA r4 +#define EXPB r5 +#define EXPB_A r5:4 + +#define ZTMP r7:6 +#define ZTMPH r7 +#define ZTMPL r6 + +#define ATMP r13:12 +#define ATMPH r13 +#define ATMPL r12 + +#define BTMP r9:8 +#define BTMPH r9 +#define BTMPL r8 + +#define ATMP2 r11:10 +#define ATMP2H r11 +#define ATMP2L r10 + +#define EXPDIFF r15 +#define EXTRACTOFF r14 +#define EXTRACTAMT r15:14 + +#define TMP r28 + +#define MANTBITS 52 +#define HI_MANTBITS 20 +#define EXPBITS 11 +#define BIAS 1024 +#define MANTISSA_TO_INT_BIAS 52 +#define SR_BIT_INEXACT 5 + +#ifndef SR_ROUND_OFF +#define SR_ROUND_OFF 22 +#endif + +#define NORMAL p3 +#define BIGB p2 + +#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG +#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG +#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG +#define END(TAG) .size TAG,.-TAG + + .text + .global __hexagon_adddf3 + .global __hexagon_subdf3 + .type __hexagon_adddf3, @function + .type __hexagon_subdf3, @function + +Q6_ALIAS(adddf3) +FAST_ALIAS(adddf3) +FAST2_ALIAS(adddf3) +Q6_ALIAS(subdf3) +FAST_ALIAS(subdf3) +FAST2_ALIAS(subdf3) + + .p2align 5 +__hexagon_adddf3: + { + EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) + EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS) + ATMP = combine(##0x20000000,#0) + } + { + NORMAL = dfclass(A,#2) + NORMAL = dfclass(B,#2) + BTMP = ATMP + BIGB = cmp.gtu(EXPB,EXPA) // Is B substantially greater than A? + } + { + if (!NORMAL) jump .Ladd_abnormal // If abnormal, go to special code + if (BIGB) A = B // if B >> A, swap A and B + if (BIGB) B = A // If B >> A, swap A and B + if (BIGB) EXPB_A = combine(EXPA,EXPB) // swap exponents + } + { + ATMP = insert(A,#MANTBITS,#EXPBITS-2) // Q1.62 + BTMP = insert(B,#MANTBITS,#EXPBITS-2) // Q1.62 + EXPDIFF = sub(EXPA,EXPB) + ZTMP = combine(#62,#1) + } +#undef BIGB +#undef NORMAL +#define B_POS p3 +#define A_POS p2 +#define NO_STICKIES p1 +.Ladd_continue: + { + EXPDIFF = min(EXPDIFF,ZTMPH) // If exponent difference >= ~60, + // will collapse to sticky bit + ATMP2 = neg(ATMP) + A_POS = cmp.gt(AH,#-1) + EXTRACTOFF = #0 + } + { + if (!A_POS) ATMP = ATMP2 + ATMP2 = extractu(BTMP,EXTRACTAMT) + BTMP = ASR(BTMP,EXPDIFF) +#undef EXTRACTAMT +#undef EXPDIFF +#undef EXTRACTOFF +#define ZERO r15:14 + ZERO = #0 + } + { + NO_STICKIES = cmp.eq(ATMP2,ZERO) + if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL) + EXPB = add(EXPA,#-BIAS-60) + B_POS = cmp.gt(BH,#-1) + } + { + ATMP = add(ATMP,BTMP) // ADD!!! + ATMP2 = sub(ATMP,BTMP) // Negate and ADD --> SUB!!! + ZTMP = combine(#54,##2045) + } + { + p0 = cmp.gtu(EXPA,ZTMPH) // must be pretty high in case of large cancellation + p0 = !cmp.gtu(EXPA,ZTMPL) + if (!p0.new) jump:nt .Ladd_ovf_unf + if (!B_POS) ATMP = ATMP2 // if B neg, pick difference + } + { + A = convert_d2df(ATMP) // Convert to Double Precision, taking care of flags, etc. So nice! + p0 = cmp.eq(ATMPH,#0) + p0 = cmp.eq(ATMPL,#0) + if (p0.new) jump:nt .Ladd_zero // or maybe conversion handles zero case correctly? + } + { + AH += asl(EXPB,#HI_MANTBITS) + jumpr r31 + } + .falign +__hexagon_subdf3: + { + BH = togglebit(BH,#31) + jump __qdsp_adddf3 + } + + + .falign +.Ladd_zero: + // True zero, full cancellation + // +0 unless round towards negative infinity + { + TMP = USR + A = #0 + BH = #1 + } + { + TMP = extractu(TMP,#2,#22) + BH = asl(BH,#31) + } + { + p0 = cmp.eq(TMP,#2) + if (p0.new) AH = xor(AH,BH) + jumpr r31 + } + .falign +.Ladd_ovf_unf: + // Overflow or Denormal is possible + // Good news: Underflow flag is not possible! + + // ATMP has 2's complement value + // + // EXPA has A's exponent, EXPB has EXPA-BIAS-60 + // + // Convert, extract exponent, add adjustment. + // If > 2046, overflow + // If <= 0, denormal + // + // Note that we've not done our zero check yet, so do that too + + { + A = convert_d2df(ATMP) + p0 = cmp.eq(ATMPH,#0) + p0 = cmp.eq(ATMPL,#0) + if (p0.new) jump:nt .Ladd_zero + } + { + TMP = extractu(AH,#EXPBITS,#HI_MANTBITS) + AH += asl(EXPB,#HI_MANTBITS) + } + { + EXPB = add(EXPB,TMP) + B = combine(##0x00100000,#0) + } + { + p0 = cmp.gt(EXPB,##BIAS+BIAS-2) + if (p0.new) jump:nt .Ladd_ovf + } + { + p0 = cmp.gt(EXPB,#0) + if (p0.new) jumpr:t r31 + TMP = sub(#1,EXPB) + } + { + B = insert(A,#MANTBITS,#0) + A = ATMP + } + { + B = lsr(B,TMP) + } + { + A = insert(B,#63,#0) + jumpr r31 + } + .falign +.Ladd_ovf: + // We get either max finite value or infinity. Either way, overflow+inexact + { + A = ATMP // 2's complement value + TMP = USR + ATMP = combine(##0x7fefffff,#-1) // positive max finite + } + { + EXPB = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits + TMP = or(TMP,#0x28) // inexact + overflow + BTMP = combine(##0x7ff00000,#0) // positive infinity + } + { + USR = TMP + EXPB ^= lsr(AH,#31) // Does sign match rounding? + TMP = EXPB // unmodified rounding mode + } + { + p0 = !cmp.eq(TMP,#1) // If not round-to-zero and + p0 = !cmp.eq(EXPB,#2) // Not rounding the other way, + if (p0.new) ATMP = BTMP // we should get infinity + } + { + A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign + } + { + p0 = dfcmp.eq(A,A) + jumpr r31 + } + +.Ladd_abnormal: + { + ATMP = extractu(A,#63,#0) // strip off sign + BTMP = extractu(B,#63,#0) // strip off sign + } + { + p3 = cmp.gtu(ATMP,BTMP) + if (!p3.new) A = B // sort values + if (!p3.new) B = A // sort values + } + { + // Any NaN --> NaN, possibly raise invalid if sNaN + p0 = dfclass(A,#0x0f) // A not NaN? + if (!p0.new) jump:nt .Linvalid_nan_add + if (!p3) ATMP = BTMP + if (!p3) BTMP = ATMP + } + { + // Infinity + non-infinity number is infinity + // Infinity + infinity --> inf or nan + p1 = dfclass(A,#0x08) // A is infinity + if (p1.new) jump:nt .Linf_add + } + { + p2 = dfclass(B,#0x01) // B is zero + if (p2.new) jump:nt .LB_zero // so return A or special 0+0 + ATMP = #0 + } + // We are left with adding one or more subnormals + { + p0 = dfclass(A,#4) + if (p0.new) jump:nt .Ladd_two_subnormal + ATMP = combine(##0x20000000,#0) + } + { + EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) + EXPB = #1 + // BTMP already ABS(B) + BTMP = asl(BTMP,#EXPBITS-2) + } +#undef ZERO +#define EXTRACTOFF r14 +#define EXPDIFF r15 + { + ATMP = insert(A,#MANTBITS,#EXPBITS-2) + EXPDIFF = sub(EXPA,EXPB) + ZTMP = combine(#62,#1) + jump .Ladd_continue + } + +.Ladd_two_subnormal: + { + ATMP = extractu(A,#63,#0) + BTMP = extractu(B,#63,#0) + } + { + ATMP = neg(ATMP) + BTMP = neg(BTMP) + p0 = cmp.gt(AH,#-1) + p1 = cmp.gt(BH,#-1) + } + { + if (p0) ATMP = A + if (p1) BTMP = B + } + { + ATMP = add(ATMP,BTMP) + } + { + BTMP = neg(ATMP) + p0 = cmp.gt(ATMPH,#-1) + B = #0 + } + { + if (!p0) A = BTMP + if (p0) A = ATMP + BH = ##0x80000000 + } + { + if (!p0) AH = or(AH,BH) + p0 = dfcmp.eq(A,B) + if (p0.new) jump:nt .Lzero_plus_zero + } + { + jumpr r31 + } + +.Linvalid_nan_add: + { + TMP = convert_df2sf(A) // will generate invalid if sNaN + p0 = dfclass(B,#0x0f) // if B is not NaN + if (p0.new) B = A // make it whatever A is + } + { + BL = convert_df2sf(B) // will generate invalid if sNaN + A = #-1 + jumpr r31 + } + .falign +.LB_zero: + { + p0 = dfcmp.eq(ATMP,A) // is A also zero? + if (!p0.new) jumpr:t r31 // If not, just return A + } + // 0 + 0 is special + // if equal integral values, they have the same sign, which is fine for all rounding + // modes. + // If unequal in sign, we get +0 for all rounding modes except round down +.Lzero_plus_zero: + { + p0 = cmp.eq(A,B) + if (p0.new) jumpr:t r31 + } + { + TMP = USR + } + { + TMP = extractu(TMP,#2,#SR_ROUND_OFF) + A = #0 + } + { + p0 = cmp.eq(TMP,#2) + if (p0.new) AH = ##0x80000000 + jumpr r31 + } +.Linf_add: + // adding infinities is only OK if they are equal + { + p0 = !cmp.eq(AH,BH) // Do they have different signs + p0 = dfclass(B,#8) // And is B also infinite? + if (!p0.new) jumpr:t r31 // If not, just a normal inf + } + { + BL = ##0x7f800001 // sNAN + } + { + A = convert_sf2df(BL) // trigger invalid, set NaN + jumpr r31 + } +END(__hexagon_adddf3) diff --git a/wasmrt/llvm-builtins/builtins/hexagon/dfdiv.S b/wasmrt/llvm-builtins/builtins/hexagon/dfdiv.S new file mode 100644 index 0000000..202965e --- /dev/null +++ b/wasmrt/llvm-builtins/builtins/hexagon/dfdiv.S @@ -0,0 +1,491 @@ +//===----------------------Hexagon builtin routine ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Double Precision Divide + +#define A r1:0 +#define AH r1 +#define AL r0 + +#define B r3:2 +#define BH r3 +#define BL r2 + +#define Q r5:4 +#define QH r5 +#define QL r4 + +#define PROD r7:6 +#define PRODHI r7 +#define PRODLO r6 + +#define SFONE r8 +#define SFDEN r9 +#define SFERROR r10 +#define SFRECIP r11 + +#define EXPBA r13:12 +#define EXPB r13 +#define EXPA r12 + +#define REMSUB2 r15:14 + + + +#define SIGN r28 + +#define Q_POSITIVE p3 +#define NORMAL p2 +#define NO_OVF_UNF p1 +#define P_TMP p0 + +#define RECIPEST_SHIFT 3 +#define QADJ 61 + +#define DFCLASS_NORMAL 0x02 +#define DFCLASS_NUMBER 0x0F +#define DFCLASS_INFINITE 0x08 +#define DFCLASS_ZERO 0x01 +#define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO) +#define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE) + +#define DF_MANTBITS 52 +#define DF_EXPBITS 11 +#define SF_MANTBITS 23 +#define SF_EXPBITS 8 +#define DF_BIAS 0x3ff + +#define SR_ROUND_OFF 22 + +#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG +#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG +#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG +#define END(TAG) .size TAG,.-TAG + + .text + .global __hexagon_divdf3 + .type __hexagon_divdf3,@function + Q6_ALIAS(divdf3) + FAST_ALIAS(divdf3) + FAST2_ALIAS(divdf3) + .p2align 5 +__hexagon_divdf3: + { + NORMAL = dfclass(A,#DFCLASS_NORMAL) + NORMAL = dfclass(B,#DFCLASS_NORMAL) + EXPBA = combine(BH,AH) + SIGN = xor(AH,BH) + } +#undef A +#undef AH +#undef AL +#undef B +#undef BH +#undef BL +#define REM r1:0 +#define REMHI r1 +#define REMLO r0 +#define DENOM r3:2 +#define DENOMHI r3 +#define DENOMLO r2 + { + if (!NORMAL) jump .Ldiv_abnormal + PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS) + SFONE = ##0x3f800001 + } + { + SFDEN = or(SFONE,PRODLO) + EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32) + EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32) + Q_POSITIVE = cmp.gt(SIGN,#-1) + } +#undef SIGN +#define ONE r28 +.Ldenorm_continue: + { + SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN) + SFERROR = and(SFONE,#-2) + ONE = #1 + EXPA = sub(EXPA,EXPB) + } +#undef EXPB +#define RECIPEST r13 + { + SFERROR -= sfmpy(SFRECIP,SFDEN):lib + REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) + RECIPEST = ##0x00800000 << RECIPEST_SHIFT + } + { + SFRECIP += sfmpy(SFRECIP,SFERROR):lib + DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) + SFERROR = and(SFONE,#-2) + } + { + SFERROR -= sfmpy(SFRECIP,SFDEN):lib + QH = #-DF_BIAS+1 + QL = #DF_BIAS-1 + } + { + SFRECIP += sfmpy(SFRECIP,SFERROR):lib + NO_OVF_UNF = cmp.gt(EXPA,QH) + NO_OVF_UNF = !cmp.gt(EXPA,QL) + } + { + RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT) + Q = #0 + EXPA = add(EXPA,#-QADJ) + } +#undef SFERROR +#undef SFRECIP +#define TMP r10 +#define TMP1 r11 + { + RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT)) + } + +#define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \ + { \ + PROD = mpyu(RECIPEST,REMHI); \ + REM = asl(REM,# ## ( REMSHIFT )); \ + }; \ + { \ + PRODLO = # ## 0; \ + REM -= mpyu(PRODHI,DENOMLO); \ + REMSUB2 = mpyu(PRODHI,DENOMHI); \ + }; \ + { \ + Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \ + REM -= asl(REMSUB2, # ## 32); \ + EXTRA \ + } + + + DIV_ITER1B(ASL,14,15,) + DIV_ITER1B(ASR,1,15,) + DIV_ITER1B(ASR,16,15,) + DIV_ITER1B(ASR,31,15,PROD=# ( 0 );) + +#undef REMSUB2 +#define TMPPAIR r15:14 +#define TMPPAIRHI r15 +#define TMPPAIRLO r14 +#undef RECIPEST +#define EXPB r13 + { + // compare or sub with carry + TMPPAIR = sub(REM,DENOM) + P_TMP = cmp.gtu(DENOM,REM) + // set up amt to add to q + if (!P_TMP.new) PRODLO = #2 + } + { + Q = add(Q,PROD) + if (!P_TMP) REM = TMPPAIR + TMPPAIR = #0 + } + { + P_TMP = cmp.eq(REM,TMPPAIR) + if (!P_TMP.new) QL = or(QL,ONE) + } + { + PROD = neg(Q) + } + { + if (!Q_POSITIVE) Q = PROD + } +#undef REM +#undef REMHI +#undef REMLO +#undef DENOM +#undef DENOMLO +#undef DENOMHI +#define A r1:0 +#define AH r1 +#define AL r0 +#define B r3:2 +#define BH r3 +#define BL r2 + { + A = convert_d2df(Q) + if (!NO_OVF_UNF) jump .Ldiv_ovf_unf + } + { + AH += asl(EXPA,#DF_MANTBITS-32) + jumpr r31 + } + +.Ldiv_ovf_unf: + { + AH += asl(EXPA,#DF_MANTBITS-32) + EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32) + } + { + PROD = abs(Q) + EXPA = add(EXPA,EXPB) + } + { + P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS) // overflow + if (P_TMP.new) jump:nt .Ldiv_ovf + } + { + P_TMP = cmp.gt(EXPA,#0) + if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible... + } + // Underflow + // We know what the infinite range exponent should be (EXPA) + // Q is 2's complement, PROD is abs(Q) + // Normalize Q, shift right, add a high bit, convert, change exponent + +#define FUDGE1 7 // how much to shift right +#define FUDGE2 4 // how many guard/round to keep at lsbs + + { + EXPB = add(clb(PROD),#-1) // doesn't need to be added in since + EXPA = sub(#FUDGE1,EXPA) // we extract post-converted exponent + TMP = USR + TMP1 = #63 + } + { + EXPB = min(EXPA,TMP1) + TMP1 = or(TMP,#0x030) + PROD = asl(PROD,EXPB) + EXPA = #0 + } + { + TMPPAIR = extractu(PROD,EXPBA) // bits that will get shifted out + PROD = lsr(PROD,EXPB) // shift out bits + B = #1 + } + { + P_TMP = cmp.gtu(B,TMPPAIR) + if (!P_TMP.new) PRODLO = or(BL,PRODLO) + PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2) + } + { + Q = neg(PROD) + P_TMP = bitsclr(PRODLO,#(1<