diff --git a/.github/workflows/build-all.yml b/.github/workflows/build-all.yml index 22b71a2b..62617826 100644 --- a/.github/workflows/build-all.yml +++ b/.github/workflows/build-all.yml @@ -5,6 +5,7 @@ on: - cron: '0 0 * * *' release: types: [ published ] + workflow_dispatch: jobs: build-tarballs: @@ -39,7 +40,7 @@ jobs: run: ./dist.sh ${{ matrix.target }} - name: Upload artifact - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: - name: tarballs + name: ${{ matrix.target }} path: mold-*.tar.gz diff --git a/.github/workflows/build-x86.yml b/.github/workflows/build-x86.yml index 1866691c..828a9f14 100644 --- a/.github/workflows/build-x86.yml +++ b/.github/workflows/build-x86.yml @@ -3,6 +3,7 @@ name: Build x86 tarball on: push: branches: [ main ] + workflow_dispatch: jobs: build-tarball: @@ -30,7 +31,7 @@ jobs: run: ./dist.sh - name: Upload artifact - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: name: tarball path: mold-*.tar.gz diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3015849f..16e27fac 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,40 +1,30 @@ name: CI on: push: - branches: [ main ] pull_request: - branches: [ main ] env: UBSAN_OPTIONS: print_stacktrace=1:halt_on_error=1 jobs: - build-clang: + build-sanitizers: strategy: matrix: target: - # Disable PCH for the default configuration. This prevents relying on implicit includes. - - '-DCMAKE_DISABLE_PRECOMPILE_HEADERS=On' + - '' - '-DMOLD_USE_ASAN=On' - '-DMOLD_USE_TSAN=On' - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v3 - uses: rui314/setup-mold@staging - - name: install-build-deps - run: sudo ./install-build-deps.sh - - name: ccache - uses: hendrikmuhs/ccache-action@v1 + - run: sudo ./install-build-deps.sh - name: build run: | - echo "/usr/lib/ccache:/usr/local/opt/ccache/libexec" >> $GITHUB_PATH - sudo apt-get install -y clang++-12 + sudo apt-get install -y clang-18 clang gcc-multilib gdb dwarfdump zstd mkdir build cd build - cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++-12 ${{ matrix.target }} .. + cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_COMPILER=clang-18 -DCMAKE_CXX_COMPILER=clang++-18 ${{ matrix.target }} .. cmake --build . -j$(nproc) - - name: test - run: | - cd build - ctest -j$(nproc) + - run: ctest --test-dir build -j$(nproc) - name: archive test results uses: actions/upload-artifact@v3 if: failure() @@ -44,8 +34,8 @@ jobs: build !build/CMakeFiles - build-gcc: - runs-on: ubuntu-20.04 + build-multi-archs: + runs-on: ubuntu-latest container: gcc:11.1.0 steps: - uses: actions/checkout@v3 @@ -72,34 +62,29 @@ jobs: # Install a LoongArch toolchain mkdir /larch - wget -O- -q https://github.com/loongson/build-tools/releases/download/2023.08.08/CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz | tar -C /larch --strip-components=1 --xz -xf - + wget -O- -q https://github.com/loongson/build-tools/releases/download/2024.08.08/x86_64-cross-tools-loongarch64-binutils_2.43-gcc_14.2.0-glibc_2.40.tar.xz | tar -C /larch --strip-components=1 --xz -xf - + cp -r /larch/loongarch64-unknown-linux-gnu/lib/* /larch/target/lib64 ln -sf /larch/target /usr/loongarch64-linux-gnu - cp -r /larch/loongarch64-unknown-linux-gnu/lib/* /usr/loongarch64-linux-gnu/lib64/ - for i in objdump objcopy strip; do + for i in gcc g++ objdump objcopy strip; do ln -sf /larch/bin/loongarch64-unknown-linux-gnu-$i /usr/bin/loongarch64-linux-gnu-$i done - echo '/larch/bin/loongarch64-unknown-linux-gnu-gcc -L/larch/loongarch64-unknown-linux-gnu "$@"' > /usr/bin/loongarch64-linux-gnu-gcc - echo '/larch/bin/loongarch64-unknown-linux-gnu-g++ -L/larch/loongarch64-unknown-linux-gnu "$@"' > /usr/bin/loongarch64-linux-gnu-g++ - chmod 755 /usr/bin/loongarch64-linux-gnu-{gcc,g++} - wget -O /usr/local/bin/qemu-loongarch64 -q https://github.com/loongson/build-tools/releases/download/2023.08.08/qemu-loongarch64 chmod 755 /usr/local/bin/qemu-loongarch64 - - name: ccache - uses: hendrikmuhs/ccache-action@v1 + + # Install Intel SDE CPU emulator for CET-related tests + mkdir /sde + wget -O- -q https://downloadmirror.intel.com/813591/sde-external-9.33.0-2024-01-07-lin.tar.xz | tar -C /sde --strip-components=1 --xz -xf - + ln -s /sde/sde /usr/bin - name: build run: | - echo "/usr/lib/ccache:/usr/local/opt/ccache/libexec" >> $GITHUB_PATH mkdir build cd build cmake .. cmake --build . -j$(nproc) - - name: test - run: | - cd build - ctest -j$(nproc) + - run: ctest --test-dir build -j$(nproc) - name: archive test results uses: actions/upload-artifact@v3 if: failure() @@ -109,24 +94,38 @@ jobs: build !build/CMakeFiles - build-macos: - runs-on: macos-11 + build-distros: strategy: matrix: - target: - # Disable PCH for the default configuration. This prevents relying on implicit includes. - - '-DCMAKE_DISABLE_PRECOMPILE_HEADERS=On' - - '-DMOLD_USE_ASAN=On' + distro: + - alpine + - archlinux + - fedora + - gentoo/stage3 + - opensuse/tumbleweed + - ubuntu:22.04 + runs-on: ubuntu-latest + container: ${{ matrix.distro }} + steps: + - uses: actions/checkout@v2 + - run: ./install-build-deps.sh + - name: build + run: | + mkdir build + cd build + cmake .. + cmake --build . -j$(nproc) + - run: ctest --test-dir build -j$(nproc) + + build-macos: + runs-on: macos-12 steps: - uses: actions/checkout@v3 - - name: ccache - uses: hendrikmuhs/ccache-action@v1 - name: build run: | - echo "/usr/lib/ccache:/usr/local/opt/ccache/libexec" >> $GITHUB_PATH mkdir build cd build - cmake ${{ matrix.target }} .. + cmake .. cmake --build . -j$(sysctl -n hw.physicalcpu) build-windows: @@ -155,5 +154,21 @@ jobs: run: | mkdir build cd build - cmake -GNinja -DMOLD_USE_MIMALLOC=OFF -DMOLD_USE_SYSTEM_TBB=ON -DMOLD_USE_MOLD=OFF .. + cmake -GNinja -DMOLD_USE_MIMALLOC=OFF -DMOLD_USE_SYSTEM_TBB=ON .. cmake --build . -j $(nproc) + + build-freebsd: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Build and test + uses: vmactions/freebsd-vm@v1 + with: + usesh: true + run: | + ./install-build-deps.sh + mkdir build + cd build + cmake .. + cmake --build . -j$(nproc) + ctest -j$(nproc) diff --git a/.github/workflows/update-manpage.yml b/.github/workflows/update-manpage.yml index e4442a72..1107e774 100644 --- a/.github/workflows/update-manpage.yml +++ b/.github/workflows/update-manpage.yml @@ -1,5 +1,3 @@ -# This file is generated by ChatGPT - name: Update manpage on: @@ -8,6 +6,7 @@ on: - 'docs/mold.md' branches: - main + workflow_dispatch: jobs: update-manpage: @@ -18,7 +17,7 @@ jobs: uses: actions/checkout@v2 - name: Install ronn - run: sudo apt-get install -y ronn + run: sudo apt-get update && sudo apt-get install -y ronn - name: Generate mold.1 from mold.md run: ronn --roff docs/mold.md diff --git a/CMakeLists.txt b/CMakeLists.txt index ad31bf03..d6d1500a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,7 +44,7 @@ # features and behave exactly the same. cmake_minimum_required(VERSION 3.14) -project(mold VERSION 2.32.1) +project(mold VERSION 2.34.0) include(CMakeDependentOption) include(CheckSymbolExists) @@ -61,6 +61,8 @@ target_compile_features(mold PRIVATE cxx_std_20) if(MINGW) target_link_libraries(mold PRIVATE dl) +else() + target_link_libraries(mold PRIVATE ${CMAKE_DL_LIBS}) endif() if(NOT "${CMAKE_CXX_COMPILER_FRONTEND_VARIANT}" STREQUAL "MSVC") @@ -250,7 +252,7 @@ if(NOT APPLE AND NOT WIN32) # Remove the default `lib` prefix set_target_properties(mold-wrapper PROPERTIES PREFIX "") target_link_libraries(mold-wrapper PRIVATE ${CMAKE_DL_LIBS}) - target_sources(mold-wrapper PRIVATE elf/mold-wrapper.c) + target_sources(mold-wrapper PRIVATE src/mold-wrapper.c) endif() # If atomics doesn't work by default, add -latomic. @@ -275,12 +277,6 @@ if(NOT APPLE AND NOT MSVC) target_link_options(mold PRIVATE -pthread) endif() -# shm_open needs -lrt -find_library(LIBRT rt) -if(LIBRT) - target_link_libraries(mold PRIVATE rt) -endif() - check_symbol_exists(madvise sys/mman.h HAVE_MADVISE) # Create a .cc file containing the current git hash for `mold --version`. @@ -288,15 +284,15 @@ add_custom_target(git_hash COMMAND ${CMAKE_COMMAND} -DSOURCE_DIR=${CMAKE_SOURCE_DIR} -DOUTPUT_FILE=${CMAKE_BINARY_DIR}/git-hash.cc - -P ${CMAKE_SOURCE_DIR}/common/update-git-hash.cmake - DEPENDS common/update-git-hash.cmake + -P ${CMAKE_SOURCE_DIR}/lib/update-git-hash.cmake + DEPENDS lib/update-git-hash.cmake BYPRODUCTS git-hash.cc VERBATIM) add_dependencies(mold git_hash) # Create config.h file -configure_file(common/config.h.in config.h) +configure_file(lib/config.h.in config.h) include_directories(${CMAKE_CURRENT_BINARY_DIR}) # Almost all functions are template in mold which take a target type @@ -309,32 +305,44 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}) # on a multicore machine. list(APPEND MOLD_ELF_TARGETS X86_64 I386 ARM64 ARM32 RV32LE RV32BE RV64LE RV64BE PPC32 PPC64V1 PPC64V2 - S390X SPARC64 M68K SH4 ALPHA LOONGARCH32 LOONGARCH64) + S390X SPARC64 M68K SH4 LOONGARCH32 LOONGARCH64) list(APPEND MOLD_ELF_TEMPLATE_FILES - elf/arch-loongarch.cc - elf/arch-riscv.cc - elf/cmdline.cc - elf/gc-sections.cc - elf/gdb-index.cc - elf/icf.cc - elf/input-files.cc - elf/input-sections.cc - elf/linker-script.cc - elf/main.cc - elf/mapfile.cc - elf/output-chunks.cc - elf/passes.cc - elf/relocatable.cc - elf/subprocess.cc - elf/thunks.cc - elf/tls.cc + src/arch-loongarch.cc + src/arch-riscv.cc + src/cmdline.cc + src/gc-sections.cc + src/gdb-index.cc + src/icf.cc + src/input-files.cc + src/input-sections.cc + src/linker-script.cc + src/main.cc + src/mapfile.cc + src/output-chunks.cc + src/passes.cc + src/relocatable.cc + src/shrink-sections.cc + src/thunks.cc + src/tls.cc ) if(WIN32 AND NOT MINGW) - list(APPEND MOLD_ELF_TEMPLATE_FILES elf/lto-win32.cc) + list(APPEND MOLD_ELF_TEMPLATE_FILES src/lto-win32.cc) +else() + list(APPEND MOLD_ELF_TEMPLATE_FILES src/lto-unix.cc) +endif() + +if(WIN32) + list(APPEND MOLD_ELF_TEMPLATE_FILES + src/output-file-win32.cc + src/subprocess-win32.cc + ) else() - list(APPEND MOLD_ELF_TEMPLATE_FILES elf/lto-unix.cc) + list(APPEND MOLD_ELF_TEMPLATE_FILES + src/output-file-unix.cc + src/subprocess-unix.cc + ) endif() function(mold_instantiate_templates SOURCE TARGET) @@ -356,58 +364,48 @@ endforeach() # Add other non-template source files. target_sources(mold PRIVATE - common/compress.cc - common/demangle.cc - common/filepath.cc - common/glob.cc - common/hyperloglog.cc - common/malloc.cc - common/multi-glob.cc - common/perf.cc - common/random.cc - common/tar.cc - elf/arch-alpha.cc - elf/arch-arm32.cc - elf/arch-arm64.cc - elf/arch-i386.cc - elf/arch-m68k.cc - elf/arch-ppc32.cc - elf/arch-ppc64v1.cc - elf/arch-ppc64v2.cc - elf/arch-s390x.cc - elf/arch-sh4.cc - elf/arch-sparc64.cc - elf/arch-x86-64.cc - elf/config.cc - elf/elf.cc git-hash.cc + lib/compress.cc + lib/crc32.cc + lib/demangle.cc + lib/filepath.cc + lib/glob.cc + lib/hyperloglog.cc + lib/malloc.cc + lib/multi-glob.cc + lib/perf.cc + lib/random.cc + lib/tar.cc + src/arch-arm32.cc + src/arch-arm64.cc + src/arch-i386.cc + src/arch-m68k.cc + src/arch-ppc32.cc + src/arch-ppc64v1.cc + src/arch-ppc64v2.cc + src/arch-s390x.cc + src/arch-sh4.cc + src/arch-sparc64.cc + src/arch-x86-64.cc + src/config.cc + src/elf.cc third-party/rust-demangle/rust-demangle.c ) if(WIN32) target_sources(mold PRIVATE - common/jobs-win32.cc - common/mapped-file-win32.cc - common/signal-win32.cc + lib/jobs-win32.cc + lib/mapped-file-win32.cc + lib/signal-win32.cc ) else() target_sources(mold PRIVATE - common/jobs-unix.cc - common/mapped-file-unix.cc - common/signal-unix.cc + lib/jobs-unix.cc + lib/mapped-file-unix.cc + lib/signal-unix.cc ) endif() -# Add frequently included header files for pre-compiling. -# target_precompile_headers is supported by CMake 3.16.0 or newer. -if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.16.0") - # ccache needs this flag along with `sloppiness = pch_defines,time_macros` - # to enable caching - if(NOT MSVC) - target_compile_options(mold PRIVATE -fpch-preprocess) - endif() -endif() - include(CTest) if(BUILD_TESTING) @@ -422,7 +420,7 @@ if(BUILD_TESTING) endif() if(${UNIX}) - add_subdirectory(test/elf) + add_subdirectory(test) endif() endif() diff --git a/README.md b/README.md index 7bcbfa38..c6ddf37b 100644 --- a/README.md +++ b/README.md @@ -24,8 +24,7 @@ free to [file a bug report](https://github.com/rui314/mold/issues). mold supports x86-64, i386, ARM64, ARM32, 64-bit/32-bit little/big-endian RISC-V, 32-bit PowerPC, 64-bit big-endian PowerPC ELFv1, 64-bit little-endian -PowerPC ELFv2, s390x, 64-bit/32-bit LoongArch, SPARC64, m68k, SH-4, and DEC -Alpha. +PowerPC ELFv2, s390x, 64-bit/32-bit LoongArch, SPARC64, m68k, and SH-4. ## Why does linking speed matter? @@ -133,7 +132,7 @@ may be able to remove the `linker = "clang"` line. ```toml [target.x86_64-unknown-linux-gnu] -rustflags = ["-C", "link-arg=-fuse-ld=/path/to/mold"] +rustflags = ["-C", "link-arg=-fuse-ld=mold"] ``` If you want to use mold for all projects, add the above snippet to diff --git a/common/integers.h b/common/integers.h deleted file mode 100644 index 2ad02d0c..00000000 --- a/common/integers.h +++ /dev/null @@ -1,221 +0,0 @@ -// This file defines integral types for file input/output. We need to use -// these types instead of the plain integers (such as uint32_t or int32_t) -// when reading from/writing to an mmap'ed file area for the following -// reasons: -// -// 1. mold is always a cross linker and should not depend on what host it -// is running on. Users should be able to run mold on a big-endian -// SPARC machine to create a little-endian RV64 binary, for example. -// -// 2. Even though data members in all ELF data strucutres are naturally -// aligned, they are not guaranteed to be aligned on memory. Because -// archive file (.a file) aligns each member only to a 2 byte boundary, -// anything larger than 2 bytes may be unaligned in an mmap'ed memory. -// Unaligned access is an undefined behavior in C/C++, so we shouldn't -// cast an arbitrary pointer to a uint32_t, for example, to read a -// 32-bits value. -// -// The data types defined in this file don't depend on host byte order and -// don't do unaligned access. - -#pragma once - -#include -#include -#include - -#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) -# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -# define __LITTLE_ENDIAN__ 1 -# elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -# define __BIG_ENDIAN__ 1 -# else -# error "unknown host byte order" -# endif -#endif - -namespace mold { - -typedef uint8_t u8; -typedef uint16_t u16; -typedef uint32_t u32; -typedef uint64_t u64; - -typedef int8_t i8; -typedef int16_t i16; -typedef int32_t i32; -typedef int64_t i64; - -template -static inline T bswap(T val) { - switch (sizeof(T)) { - case 2: return __builtin_bswap16(val); - case 4: return __builtin_bswap32(val); - case 8: return __builtin_bswap64(val); - default: __builtin_unreachable(); - } -} - -template -class LittleEndian { -public: - LittleEndian() = default; - LittleEndian(T x) { *this = x; } - - operator T() const { - if constexpr (sizeof(T) == SIZE) { - T x; - memcpy(&x, val, sizeof(T)); - if constexpr (std::endian::native == std::endian::big) - x = bswap(x); - return x; - } else { - static_assert(SIZE == 3); - return (val[2] << 16) | (val[1] << 8) | val[0]; - } - } - - LittleEndian &operator=(T x) { - if constexpr (sizeof(T) == SIZE) { - if constexpr (std::endian::native == std::endian::big) - x = bswap(x); - memcpy(val, &x, sizeof(T)); - } else { - static_assert(SIZE == 3); - val[2] = x >> 16; - val[1] = x >> 8; - val[0] = x; - } - return *this; - } - - LittleEndian &operator++() { - return *this = *this + 1; - } - - LittleEndian operator++(int) { - T ret = *this; - *this = *this + 1; - return ret; - } - - LittleEndian &operator--() { - return *this = *this - 1; - } - - LittleEndian operator--(int) { - T ret = *this; - *this = *this - 1; - return ret; - } - - LittleEndian &operator+=(T x) { - return *this = *this + x; - } - - LittleEndian &operator-=(T x) { - return *this = *this - x; - } - - LittleEndian &operator&=(T x) { - return *this = *this & x; - } - - LittleEndian &operator|=(T x) { - return *this = *this | x; - } - -private: - u8 val[SIZE]; -}; - -using il16 = LittleEndian; -using il32 = LittleEndian; -using il64 = LittleEndian; -using ul16 = LittleEndian; -using ul24 = LittleEndian; -using ul32 = LittleEndian; -using ul64 = LittleEndian; - -template -class BigEndian { -public: - BigEndian() = default; - BigEndian(T x) { *this = x; } - - operator T() const { - if constexpr (sizeof(T) == SIZE) { - T x; - memcpy(&x, val, sizeof(T)); - if constexpr (std::endian::native == std::endian::little) - x = bswap(x); - return x; - } else { - static_assert(SIZE == 3); - return (val[0] << 16) | (val[1] << 8) | val[2]; - } - } - - BigEndian &operator=(T x) { - if constexpr (sizeof(T) == SIZE) { - if constexpr (std::endian::native == std::endian::little) - x = bswap(x); - memcpy(val, &x, sizeof(T)); - } else { - static_assert(SIZE == 3); - val[0] = x >> 16; - val[1] = x >> 8; - val[2] = x; - } - return *this; - } - - BigEndian &operator++() { - return *this = *this + 1; - } - - BigEndian operator++(int) { - T ret = *this; - *this = *this + 1; - return ret; - } - - BigEndian &operator--() { - return *this = *this - 1; - } - - BigEndian operator--(int) { - T ret = *this; - *this = *this - 1; - return ret; - } - - BigEndian &operator+=(T x) { - return *this = *this + x; - } - - BigEndian &operator-=(T x) { - return *this = *this - x; - } - - BigEndian &operator&=(T x) { - return *this = *this & x; - } - - BigEndian &operator|=(T x) { - return *this = *this | x; - } - -private: - u8 val[SIZE]; -}; - -using ib16 = BigEndian; -using ib32 = BigEndian; -using ib64 = BigEndian; -using ub16 = BigEndian; -using ub24 = BigEndian; -using ub32 = BigEndian; -using ub64 = BigEndian; - -} // namespace mold diff --git a/common/jobs-unix.cc b/common/jobs-unix.cc deleted file mode 100644 index c101388e..00000000 --- a/common/jobs-unix.cc +++ /dev/null @@ -1,156 +0,0 @@ -// Many build systems attempt to invoke as many linker processes as there -// are cores, based on the assumption that the linker is single-threaded. -// However, since mold is multi-threaded, such build systems' behavior is -// not beneficial and just increases the overall peak memory usage. -// On machines with limited memory, this could lead to an out-of-memory -// error. -// -// This file implements a feature that limits the number of concurrent -// mold processes to just 1 for each user. It is intended to be used as -// `MOLD_JOBS=1 ninja` or `MOLD_JOBS=1 make -j$(nproc)`. -// -// We can't use POSIX semaphores because the counter will not be -// decremented automatically when a process exits abnormally. That would -// results in a deadlock. Therefore, we use lockf-based regional file -// locking instead. Unlike POSIX semaphores, the lock will automatically -// released on process termination. -// -// To wake processes that may be waiting on the lock file, we use a -// pthread condition variable. On normal exit, mold sends notifications to -// all waiting processes. In case of abnormal exit, we use -// pthread_cond_timedwait so that waiters will not wait forever. - -#include "common.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace mold { - -static constexpr i64 MAX_JOBS = 128; - -struct SharedData { - std::atomic_bool initialized; - pthread_mutex_t mu; - pthread_cond_t cond; -}; - -static int num_jobs = -1; -static int lock_fd = -1; -static SharedData *shared_data = nullptr; - -static i64 get_mold_jobs() { - char *env = getenv("MOLD_JOBS"); - if (!env) - return 0; - - i64 jobs = std::stol(env); - if (jobs < 0) - return 0; - return std::min(jobs, MAX_JOBS); -} - -static bool do_lock() { - for (i64 i = 0; i < num_jobs; i++) { - lseek(lock_fd, i, SEEK_SET); - if (lockf(lock_fd, F_TLOCK, 1) == 0) - return true; - } - return false; -} - -static SharedData *get_shared_data() { - // Create a shared memory object and mmap it - std::string name = "/mold-signal-" + std::to_string(getuid()); - i64 size = sizeof(SharedData); - - int shm_fd = shm_open(name.c_str(), O_CREAT | O_RDWR, 0600); - if (shm_fd == -1) { - perror("shm_open"); - exit(1); - } - - if (ftruncate(shm_fd, size) == -1) { - perror("ftruncate"); - exit(1); - } - - SharedData *data = (SharedData *)mmap(0, size, PROT_READ | PROT_WRITE, - MAP_SHARED, shm_fd, 0); - close(shm_fd); - - if (data->initialized.exchange(true) == false) { - pthread_mutexattr_t mu_attr; - pthread_mutexattr_init(&mu_attr); - pthread_mutexattr_setpshared(&mu_attr, PTHREAD_PROCESS_SHARED); - -#ifndef __APPLE__ - pthread_mutexattr_setrobust(&mu_attr, PTHREAD_MUTEX_ROBUST); -#endif - - pthread_mutex_init(&data->mu, &mu_attr); - - pthread_condattr_t cond_attr; - pthread_condattr_init(&cond_attr); - pthread_condattr_setpshared(&cond_attr, PTHREAD_PROCESS_SHARED); - pthread_cond_init(&data->cond, &cond_attr); - } - return data; -} - -void acquire_global_lock() { - num_jobs = get_mold_jobs(); - if (num_jobs == 0) - return; - - shared_data = get_shared_data(); - - std::string path; - if (char *dir = getenv("XDG_RUNTIME_DIR")) - path = dir + "/mold.lock"s; - else - path = "/tmp/mold-" + std::to_string(getuid()) + ".lock"; - - lock_fd = open(path.c_str(), O_WRONLY | O_CREAT | O_CLOEXEC, 0600); - if (lock_fd == -1 || do_lock()) - return; - - pthread_mutex_t *mu = &shared_data->mu; - pthread_cond_t *cond = &shared_data->cond; - int r = pthread_mutex_lock(mu); - -#ifndef __APPLE__ - // If the previous process got killed while holding the mutex, the - // mutex has became inconsistent. We need to fix it in that case. - if (r == EOWNERDEAD) - pthread_mutex_consistent(mu); -#endif - - for (;;) { - struct timespec ts; - clock_gettime(CLOCK_REALTIME, &ts); - ts.tv_sec += 1; - - int r = pthread_cond_timedwait(cond, mu, &ts); - if (do_lock() || r != ETIMEDOUT) - break; - } - - pthread_mutex_unlock(mu); -} - -void release_global_lock() { - if (lock_fd == -1) - return; - close(lock_fd); - pthread_cond_broadcast(&shared_data->cond); -} - -} // namespace mold diff --git a/common/output-file-unix.h b/common/output-file-unix.h deleted file mode 100644 index e09a1867..00000000 --- a/common/output-file-unix.h +++ /dev/null @@ -1,149 +0,0 @@ -#include "common.h" - -#include -#include -#include -#include -#include - -namespace mold { - -inline u32 get_umask() { - u32 orig_umask = umask(0); - umask(orig_umask); - return orig_umask; -} - -template -static std::pair -open_or_create_file(Context &ctx, std::string path, i64 filesize, i64 perm) { - std::string tmpl = filepath(path).parent_path() / ".mold-XXXXXX"; - char *path2 = (char *)save_string(ctx, tmpl).data(); - - i64 fd = mkstemp(path2); - if (fd == -1) - Fatal(ctx) << "cannot open " << path2 << ": " << errno_string(); - - // Reuse an existing file if exists and writable because on Linux, - // writing to an existing file is much faster than creating a fresh - // file and writing to it. - if (ctx.overwrite_output_file && rename(path.c_str(), path2) == 0) { - ::close(fd); - fd = ::open(path2, O_RDWR | O_CREAT, perm); - if (fd != -1 && !ftruncate(fd, filesize) && !fchmod(fd, perm & ~get_umask())) - return {fd, path2}; - - unlink(path2); - fd = ::open(path2, O_RDWR | O_CREAT, perm); - if (fd == -1) - Fatal(ctx) << "cannot open " << path2 << ": " << errno_string(); - } - - if (fchmod(fd, (perm & ~get_umask())) == -1) - Fatal(ctx) << "fchmod failed: " << errno_string(); - -#ifdef __linux__ - if (fallocate(fd, 0, 0, filesize) == 0) - return {fd, path2}; -#endif - - if (ftruncate(fd, filesize) == -1) - Fatal(ctx) << "ftruncate failed: " << errno_string(); - return {fd, path2}; -} - -template -class MemoryMappedOutputFile : public OutputFile { -public: - MemoryMappedOutputFile(Context &ctx, std::string path, i64 filesize, i64 perm) - : OutputFile(path, filesize, true) { - std::tie(this->fd, output_tmpfile) = - open_or_create_file(ctx, path, filesize, perm); - - this->buf = (u8 *)mmap(nullptr, filesize, PROT_READ | PROT_WRITE, - MAP_SHARED, this->fd, 0); - if (this->buf == MAP_FAILED) - Fatal(ctx) << path << ": mmap failed: " << errno_string(); - - mold::output_buffer_start = this->buf; - mold::output_buffer_end = this->buf + filesize; - } - - ~MemoryMappedOutputFile() { - if (fd2 != -1) - ::close(fd2); - } - - void close(Context &ctx) override { - Timer t(ctx, "close_file"); - - if (!this->is_unmapped) - munmap(this->buf, this->filesize); - - if (this->buf2.empty()) { - ::close(this->fd); - } else { - FILE *out = fdopen(this->fd, "w"); - fseek(out, 0, SEEK_END); - fwrite(&this->buf2[0], this->buf2.size(), 1, out); - fclose(out); - } - - // If an output file already exists, open a file and then remove it. - // This is the fastest way to unlink a file, as it does not make the - // system to immediately release disk blocks occupied by the file. - fd2 = ::open(this->path.c_str(), O_RDONLY); - if (fd2 != -1) - unlink(this->path.c_str()); - - if (rename(output_tmpfile, this->path.c_str()) == -1) - Fatal(ctx) << this->path << ": rename failed: " << errno_string(); - output_tmpfile = nullptr; - } - -private: - int fd2 = -1; -}; - -template -std::unique_ptr> -OutputFile::open(Context &ctx, std::string path, i64 filesize, i64 perm) { - Timer t(ctx, "open_file"); - - if (path.starts_with('/') && !ctx.arg.chroot.empty()) - path = ctx.arg.chroot + "/" + path_clean(path); - - bool is_special = false; - if (path == "-") { - is_special = true; - } else { - struct stat st; - if (stat(path.c_str(), &st) == 0 && (st.st_mode & S_IFMT) != S_IFREG) - is_special = true; - } - - OutputFile *file; - if (is_special) - file = new MallocOutputFile(ctx, path, filesize, perm); - else - file = new MemoryMappedOutputFile(ctx, path, filesize, perm); - -#ifdef MADV_HUGEPAGE - // Enable transparent huge page for an output memory-mapped file. - // On Linux, it has an effect only on tmpfs mounted with `huge=advise`, - // but it can make the linker ~10% faster. You can try it by creating - // a tmpfs with the following commands - // - // $ mkdir tmp - // $ sudo mount -t tmpfs -o size=2G,huge=advise none tmp - // - // and then specifying a path under the directory as an output file. - madvise(file->buf, filesize, MADV_HUGEPAGE); -#endif - - if (ctx.arg.filler != -1) - memset(file->buf, ctx.arg.filler, filesize); - return std::unique_ptr(file); -} - -} // namespace mold diff --git a/common/output-file-win32.h b/common/output-file-win32.h deleted file mode 100644 index 5fc92496..00000000 --- a/common/output-file-win32.h +++ /dev/null @@ -1,103 +0,0 @@ -#include "common.h" - -#include -#include -#include - -namespace mold { - -template -class MemoryMappedOutputFile : public OutputFile { -public: - MemoryMappedOutputFile(Context &ctx, std::string path, i64 filesize, i64 perm) - : OutputFile(path, filesize, true) { - // TODO: use intermediate temporary file for output. - DWORD file_attrs = - (perm & 0200) ? FILE_ATTRIBUTE_NORMAL : FILE_ATTRIBUTE_READONLY; - file_handle = - CreateFileA(path.c_str(), GENERIC_READ | GENERIC_WRITE, - FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, - nullptr, CREATE_ALWAYS, file_attrs, nullptr); - if (file_handle == INVALID_HANDLE_VALUE) - Fatal(ctx) << "cannot open " << path << ": " << GetLastError(); - - HANDLE mapping_handle = CreateFileMapping( - file_handle, nullptr, PAGE_READWRITE, 0, filesize, nullptr); - if (!mapping_handle) - Fatal(ctx) << path << ": CreateFileMapping failed: " << GetLastError(); - - this->buf = - (u8 *)MapViewOfFile(mapping_handle, FILE_MAP_WRITE, 0, 0, filesize); - CloseHandle(mapping_handle); - if (!this->buf) - Fatal(ctx) << path << ": MapViewOfFile failed: " << GetLastError(); - - mold::output_buffer_start = this->buf; - mold::output_buffer_end = this->buf + filesize; - } - - ~MemoryMappedOutputFile() { - if (file_handle != INVALID_HANDLE_VALUE) - CloseHandle(file_handle); - } - - void close(Context &ctx) override { - Timer t(ctx, "close_file"); - - UnmapViewOfFile(this->buf); - - if (!this->buf2.empty()) { - if (SetFilePointer(file_handle, 0, nullptr, FILE_END) == - INVALID_SET_FILE_POINTER) - Fatal(ctx) << this->path - << ": SetFilePointer failed: " << GetLastError(); - - DWORD written; - if (!WriteFile(file_handle, this->buf2.data(), this->buf2.size(), - &written, nullptr)) - Fatal(ctx) << this->path << ": WriteFile failed: " << GetLastError(); - } - - CloseHandle(file_handle); - file_handle = INVALID_HANDLE_VALUE; - } - -private: - HANDLE file_handle; -}; - -template -std::unique_ptr> -OutputFile::open(Context &ctx, std::string path, i64 filesize, i64 perm) { - Timer t(ctx, "open_file"); - - if (path.starts_with('/') && !ctx.arg.chroot.empty()) - path = ctx.arg.chroot + "/" + path_clean(path); - - bool is_special = false; - if (path == "-") { - is_special = true; - } else { - HANDLE file_handle = - CreateFileA(path.c_str(), GENERIC_READ, - FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, - nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); - if (file_handle != INVALID_HANDLE_VALUE) { - if (GetFileType(file_handle) != FILE_TYPE_DISK) - is_special = true; - CloseHandle(file_handle); - } - } - - OutputFile *file; - if (is_special) - file = new MallocOutputFile(ctx, path, filesize, perm); - else - file = new MemoryMappedOutputFile(ctx, path, filesize, perm); - - if (ctx.arg.filler != -1) - memset(file->buf, ctx.arg.filler, filesize); - return std::unique_ptr>(file); -} - -} // namespace mold diff --git a/common/output-file.h b/common/output-file.h deleted file mode 100644 index 63299ed9..00000000 --- a/common/output-file.h +++ /dev/null @@ -1,5 +0,0 @@ -#if _WIN32 -# include "output-file-win32.h" -#else -# include "output-file-unix.h" -#endif diff --git a/debian/changelog b/debian/changelog index cedfce47..7e7ae693 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,10 +1,13 @@ -mold (2.32.1+dfsg-3) UNRELEASED; urgency=medium +mold (2.34.0+dfsg-1) unstable; urgency=medium - * Add --encoded-package-metadata option (pulled from upstream, #1308). + * New upstream release + * Fix two missing-license-paragraph-in-dep5-copyright warnings + + [ Matthias Klose ] * When no package-metadata option is given, fall-back to the envvar ELF_PACKAGE_METADATA. - -- Matthias Klose Tue, 06 Aug 2024 13:29:29 +0200 + -- Sylvestre Ledru Wed, 25 Sep 2024 12:30:51 +0200 mold (2.32.1+dfsg-2) unstable; urgency=medium diff --git a/debian/copyright b/debian/copyright index 54ec87d2..3c0b7c1e 100644 --- a/debian/copyright +++ b/debian/copyright @@ -9,7 +9,7 @@ Files-Excluded: third-party/mimalloc/bin/mimalloc-redirect.dll third-party/zlib/contrib/dotzlib Files: * -Copyright: 2020-2021 Rui Ueyama +Copyright: 2020-2024 Rui Ueyama License: MIT Files: third-party/tbb/* @@ -111,3 +111,33 @@ License: MIT LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +License: BSD-3-Clause + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + . + 1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + . + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + . + 3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software without + specific prior written permission. + . + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +License: GPL-2+ + On Debian systems, the full text of the GNU General Public License + version 2 can be found in the file '/usr/share/common-licenses/GPL-2'. diff --git a/debian/patches/encoded-package-metadata.diff b/debian/patches/encoded-package-metadata.diff deleted file mode 100644 index 7a7a8ed6..00000000 --- a/debian/patches/encoded-package-metadata.diff +++ /dev/null @@ -1,79 +0,0 @@ ---- a/elf/cmdline.cc -+++ b/elf/cmdline.cc -@@ -119,6 +119,8 @@ Options: - --oformat=binary Omit ELF, section, and program headers - --pack-dyn-relocs=[relr,none] - Pack dynamic relocations -+ --encoded-package-metadata=PERCENT_ENCODED_STRING -+ Set a given string to .note.package - --package-metadata=STRING Set a given string to .note.package - --perf Print performance statistics - --pie, --pic-executable Create a position-independent executable -@@ -406,6 +408,49 @@ split_by_comma_or_colon(std::string_view - return vec; - } - -+/* Decode a hexadecimal character. Return -1 on error. */ -+static int hexdecode(char c) { -+ if ('0' <= c && c <= '9') -+ return c - '0'; -+ if ('A' <= c && c <= 'F') -+ return c - 'A' + 10; -+ if ('a' <= c && c <= 'f') -+ return c - 'a' + 10; -+ return -1; -+} -+ -+template -+static std::string parse_percent_encoded_string(Context &ctx, std::string opt, std::string_view arg) { -+ std::string decoded; -+ int step = 1; -+ for (i64 i = 0; i < arg.size(); i += step) { -+ step = 1; -+ if (arg[i] != '%') { -+ decoded += arg[i]; -+ continue; -+ } -+ if (i + 1 > arg.size()) { -+ Fatal(ctx) << "option --" << opt << ": invalid percent-encoded string: " << arg; -+ } -+ step++; -+ if (arg[i+1] == '%') { -+ decoded += '%'; -+ continue; -+ } -+ if (i + 2 > arg.size()) { -+ Fatal(ctx) << "option --" << opt << ": invalid percent-encoded string: " << arg; -+ } -+ step++; -+ int hex1 = hexdecode(arg[i+1]); -+ int hex2 = hexdecode(arg[i+2]); -+ if (hex1 == -1 || hex2 == -1) { -+ Fatal(ctx) << "option --" << opt << ": invalid percent-encoded string: " << arg; -+ } -+ decoded += (char) ((hex1 << 4) + hex2); -+ } -+ return decoded; -+} -+ - template - static void read_retain_symbols_file(Context &ctx, std::string_view path) { - MappedFile *mf = must_open_file(ctx, std::string(path)); -@@ -863,6 +908,8 @@ std::vector parse_nonpositi - } else if (read_flag("pack-dyn-relocs=none") || - read_z_flag("nopack-relative-relocs")) { - ctx.arg.pack_dyn_relocs_relr = false; -+ } else if (read_arg("encoded-package-metadata")) { -+ ctx.arg.package_metadata = parse_percent_encoded_string(ctx, "encoded-package-metadata", arg); - } else if (read_arg("package-metadata")) { - ctx.arg.package_metadata = arg; - } else if (read_flag("stats")) { ---- a/test/elf/package-metadata.sh -+++ b/test/elf/package-metadata.sh -@@ -10,3 +10,6 @@ EOF - - $CC -B. -o $t/exe $t/a.o -Wl,-package-metadata='{"foo":"bar"}' - readelf -x .note.package $t/exe | grep -Fq '{"foo":"bar"}' -+ -+$CC -B. -o $t/exe2 $t/a.o -Wl,--encoded-package-metadata=%7B%22foo%22%3A%22bar%22%7D -+readelf -x .note.package $t/exe2 | grep -Fq '{"foo":"bar"}' diff --git a/debian/patches/env-package-metadata.diff b/debian/patches/env-package-metadata.diff index d88f614a..d1b1b590 100644 --- a/debian/patches/env-package-metadata.diff +++ b/debian/patches/env-package-metadata.diff @@ -1,6 +1,8 @@ ---- a/elf/cmdline.cc -+++ b/elf/cmdline.cc -@@ -1480,6 +1480,14 @@ std::vector parse_nonpositi +Index: mold/src/cmdline.cc +=================================================================== +--- mold.orig/src/cmdline.cc ++++ mold/src/cmdline.cc +@@ -1506,6 +1506,14 @@ std::vector parse_nonpositi ctx.arg.dependency_file = ctx.arg.chroot + "/" + ctx.arg.dependency_file; } diff --git a/debian/patches/fix-armhf-build.diff b/debian/patches/fix-armhf-build.diff deleted file mode 100644 index 5462028d..00000000 --- a/debian/patches/fix-armhf-build.diff +++ /dev/null @@ -1,24 +0,0 @@ -From baf9ae9038dba56324e08e5df0023225a6067154 Mon Sep 17 00:00:00 2001 -From: Rui Ueyama -Date: Tue, 16 Jul 2024 11:59:22 +0900 -Subject: [PATCH] Fix a test on Debian - -If the default linker doesn't complain, just skip the test. - -Fixes https://github.com/rui314/mold/issues/1301 ---- - test/elf/arm_abs-error.sh | 2 ++ - 1 file changed, 2 insertions(+) - -Index: mold/test/elf/arm_abs-error.sh -=================================================================== ---- mold.orig/test/elf/arm_abs-error.sh -+++ mold/test/elf/arm_abs-error.sh -@@ -12,5 +12,7 @@ extern char foo; - int main() { printf("foo=%p\n", &foo); } - EOF - -+$CC -o $t/exe -pie $t/a.o $t/b.o >& /dev/null && skip -+ - ! $CC -B. -o $t/exe -pie $t/a.o $t/b.o >& $t/log - grep -q 'recompile with -fPIC' $t/log diff --git a/debian/patches/series b/debian/patches/series index c5049b32..3b68d71e 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1,3 +1 @@ -fix-armhf-build.diff -encoded-package-metadata.diff env-package-metadata.diff diff --git a/dist.sh b/dist.sh index 6d2d698a..1137feaa 100755 --- a/dist.sh +++ b/dist.sh @@ -162,6 +162,8 @@ mkdir /build cd /build cmake -DCMAKE_BUILD_TYPE=Release -DMOLD_MOSTLY_STATIC=On /mold cmake --build . -j\$(nproc) +mv mold mold2 +./mold2 -run cmake --build . -j\$(nproc) ctest -j\$(nproc) cmake --install . --prefix $dest --strip find $dest -print | xargs touch --no-dereference --date='$timestamp' diff --git a/docs/design.md b/docs/design.md index 62ea14e6..6bcf7004 100644 --- a/docs/design.md +++ b/docs/design.md @@ -1,3 +1,8 @@ +[This document was written in 2020, and the contents are outdated. +Specifically, we no longer believe that object preloading is a good +idea. That being said, most of the points in this document still hold +even today. Therefore, I'll keep this document as-is.] + ## Design and implementation of mold For the rest of this documentation, I'll explain the design and the diff --git a/docs/mold.1 b/docs/mold.1 index 283a89a4..f0d9d1f7 100644 --- a/docs/mold.1 +++ b/docs/mold.1 @@ -1,6 +1,6 @@ .\" generated with Ronn-NG/v0.9.1 .\" http://github.com/apjanke/ronn-ng/tree/0.9.1 -.TH "MOLD" "1" "May 2024" "" +.TH "MOLD" "1" "August 2024" "" .SH "NAME" \fBmold\fR \- a modern linker .SH "SYNOPSIS" @@ -67,6 +67,9 @@ Synonym for \fB\-\-color\-diagnostics=auto\fR\. \fB\-\-no\-color\-diagnostics\fR Synonym for \fB\-\-color\-diagnostics=never\fR\. .TP +\fB\-\-detach\fR, `\-\-no\-detach +Permit or do not permit mold to create a debug info file in the background\. +.TP \fB\-\-fork\fR, \fB\-\-no\-fork\fR Spawn a child process and let it do the actual linking\. When linking a large program, the OS kernel can take a few hundred milliseconds to terminate a \fBmold\fR process\. \fB\-\-fork\fR hides that latency\. By default, it does fork\. .TP @@ -94,7 +97,16 @@ This option is useful for finding bugs that depend on the initialization order o By reversing the order of input sections using \fB\-\-reverse\-sections\fR, you can easily test that your program works in the reversed initialization order\. .TP \fB\-\-run\fR \fIcommand\fR \fIarg\fR\|\.\|\.\|\. -Run \fIcommand\fR with \fBmold\fR \fB/usr/bin/ld\fR\. Specifically, \fBmold\fR runs a given command with the \fBLD_PRELOAD\fR environment set to intercept exec(3) family functions and replaces \fBargv[0]\fR with itself if it is \fBld\fR, \fBld\.gold\fR, or \fBld\.lld\fR\. +Run \fIcommand\fR with \fBmold\fR as \fB/usr/bin/ld\fR\. Specifically, \fBmold\fR runs a given command with the \fBLD_PRELOAD\fR environment set to intercept exec(3) family functions and replaces \fBargv[0]\fR with itself if it is \fBld\fR, \fBld\.gold\fR, or \fBld\.lld\fR\. +.TP +\fB\-\-separate\-debug\-file\fR, \fB\-\-separate\-debug\-file\fR=\fIfile\fR +Bundle debug info sections into a separate file instead of embedding them in an output executable or a shared library\. mold creates a debug info file in the background by default, so that you can start running your executable as soon as possible\. +.IP +By default, the debug info file is created in the same directory as is the output file, with the \fB\.dbg\fR file extension\. That filename is embedded into the output file so that \fBgdb\fR can automatically find the debug info file for the output file\. For more info about gdb features related to separate debug files, see \fIhttps://sourceware\.org/gdb/current/onlinedocs/gdb\.html/Separate\-Debug\-Files\.html\fR\. +.IP +mold holds a file lock with flock(2) while creating a debug info file in the background\. +.IP +If you don't want to create a debug info file in the background, pass the \fB\-\-no\-detach\fR option\. .TP \fB\-\-shuffle\-sections\fR, \fB\-\-shuffle\-sections\fR=\fInumber\fR Randomize the output by shuffling the order of input sections before assigning them the offsets in the output file\. If a \fInumber\fR is given, it's used as a seed for the random number generator, so that the linker produces the same output for the same seed\. If no seed is given, a random number is used as a seed\. @@ -119,6 +131,17 @@ Use multiple threads\. By default, \fBmold\fR uses as many threads as the number .TP \fB\-\-quick\-exit\fR, \fB\-\-no\-quick\-exit\fR Use or do not use \fBquick_exit\fR to exit\. +.TP +\fB\-z rewrite\-endbr\fR, \fB\-z norewrite\-endbr\fR +As a security measure, some CPU instruction sets have recently gained a feature to protect control flow integrity by disallowing indirect branches by default\. If the feature is enabled, the instruction that is executed immediately after an indirect branch must be an branch target marker instruction, or a CPU\-level fault will raise\. The marker instruction is also known as "landing pad" instruction, to which indirect branches can land\. This feature makes ROP attacks harder to conduct\. +.IP +To use the feature, a function whose pointer is taken needs to begin with a landing pad because a function call via a function pointer is compiled to an indirect branch\. On the other hand, if a function is called only directly (i\.e\. referred to only by \fIdirect\fR branch instructions), it doesn't have to begin with it\. +.IP +By default, the compiler always emits a landing pad at the beginning of each global function because it doesn't know whether or not the function's pointer is taken in another translation unit\. As a result, the resulting binary has more attack surface than necessary\. +.IP +If \fB\-\-rewrite\-endbr\fR is given, mold conducts a whole program analysis to identify functions whose addresses are actually taken and rewrites landing pads with no\-ops for non\-address\-taken functions, reducing the attack surface\. +.IP +This feature is currently available only on x86\-64\. .SH "GNU\-COMPATIBLE OPTIONS" .TP \fB\-\-help\fR @@ -227,13 +250,20 @@ Alias for \fB\-\-section\-start=\.text=\fR\fIaddress\fR\. \fB\-\-allow\-multiple\-definition\fR Normally, the linker reports an error if there are more than one definition of a symbol\. This option changes the default behavior so that it doesn't report an error for duplicate definitions and instead use the first definition\. .TP +\fB\-\-allow\-shlib\-undefined\fR, \fB\-\-no\-allow\-shlib\-undefined\fR +Even if mold succeeds in linking a main executable without undefined symbol errors, you may still encounter symbol lookup errors at runtime because the dynamic linker cannot find some symbols in shared libraries in any ELF module\. This occurs because mold ignores undefined symbols in shared libraries by default\. +.IP +If you pass \fB\-\-no\-allow\-shlib\-undefined\fR, mold verifies that undefined symbols in shared libraries given to the linker can be resolved at link\-time\. In other words, this converts the runtime error to a link\-time error\. +.IP +Note that you need to pass all shared libraries, including indirectly dependent ones, to the linker as arguments for \fB\-l\fR\. If a shared library depends on a library that's not passed to the linker, the verification will be skipped for that file\. +.TP \fB\-\-as\-needed\fR, \fB\-\-no\-as\-needed\fR By default, shared libraries given to the linker are unconditionally added to the list of required libraries in an output file\. However, shared libraries after \fB\-\-as\-needed\fR are added to the list only when at least one symbol is actually used by the output file\. In other words, shared libraries after \fB\-\-as\-needed\fR are not added to the list of needed libraries if they are not needed by a program\. .IP The \fB\-\-no\-as\-needed\fR option restores the default behavior for subsequent files\. .TP -\fB\-\-build\-id\fR=[ \fBmd5\fR | \fBsha1\fR | \fBsha256\fR | \fBuuid\fR | \fB0x\fR\fIhexstring\fR | \fBnone\fR ] -Create a \fB\.note\.gnu\.build\-id\fR section containing a byte string to uniquely identify an output file\. \fBsha256\fR compute a 256\-bit cryptographic hash of an output file and set it to build\-id\. \fBmd5\fR and \fBsha1\fR compute the same hash but truncate it to 128 and 160 bits, respectively, before setting it to build\-id\. \fBuuid\fR sets a random 128\-bit UUID\. \fB0x\fR\fIhexstring\fR sets \fIhexstring\fR\. +\fB\-\-build\-id\fR=[ \fBmd5\fR | \fBsha1\fR | \fBsha256\fR | \fBfast\fR | \fBuuid\fR | \fB0x\fR\fIhexstring\fR | \fBnone\fR ] +Create a \fB\.note\.gnu\.build\-id\fR section containing a byte string to uniquely identify an output file\. \fBsha256\fR compute a 256\-bit cryptographic hash of an output file and set it to build\-id\. \fBmd5\fR and \fBsha1\fR compute the same hash but truncate it to 128 and 160 bits, respectively, before setting it to build\-id\. \fBuuid\fR sets a random 128\-bit UUID\. \fB0x\fR\fIhexstring\fR sets \fIhexstring\fR\. \fBfast\fR is a synonym for \fBsha256\fR\. .TP \fB\-\-build\-id\fR Synonym for \fB\-\-build\-id=sha256\fR\. @@ -272,10 +302,8 @@ The \fB\-\-emit\-relocs\fR instructs the linker to leave relocation sections in \fB\-\-enable\-new\-dtags\fR, \fB\-\-disable\-new\-dtags\fR By default, \fBmold\fR emits \fBDT_RUNPATH\fR for \fB\-\-rpath\fR\. If you pass \fB\-\-disable\-new\-dtags\fR, \fBmold\fR emits \fBDT_RPATH\fR for \fB\-\-rpath\fR instead\. .TP -\fB\-\-execute\-only\fR -Traditionally, most processors require both executable and readable bits to 1 to make the page executable, which allows machine code to be read as data at runtime\. This is actually what an attacker often does after gaining a limited control of a process to find pieces of machine code they can use to gain the full control of the process\. As a mitigation, some recent processors allows "execute\-only" pages\. If a page is execute\-only, you can call a function there as long as you know its address but can't read it as data\. -.IP -This option marks text segments execute\-only\. This option currently works only on some ARM64 processors\. +\fB\-\-execute\-only\fR: + .TP \fB\-\-exclude\-libs\fR=\fIlibraries\fR \|\.\|\.\|\. Mark all symbols in the given \fIlibraries\fR hidden\. @@ -332,9 +360,6 @@ If \fBrelr\fR is specified, all \fBR_*_RELATIVE\fR relocations are put into \fB\ .IP Note that a runtime loader has to support \fB\.relr\.dyn\fR to run executables or shared libraries linked with \fB\-\-pack\-dyn\-relocs=relr\fR\. As of 2022, only ChromeOS, Android and Fuchsia support it\. .TP -\fB\-\-package\-metadata\fR=\fIstring\fR -Embed \fIstring\fR to a \fB\.note\.package\fR section\. This option is intended to be used by a package management command such as rpm(8) to embed metadata regarding a package to each executable file\. -.TP \fB\-\-pie\fR, \fB\-\-pic\-executable\fR, \fB\-\-no\-pie\fR, \fB\-\-no\-pic\-executable\fR Create a position\-independent executable\. .TP @@ -499,7 +524,7 @@ Mark DSO to be initialized first at runtime\. \fB\-z interpose\fR Mark object to interpose all DSOs but executable\. .TP -\fB\-(\fR, \fB\-)\fR, \fB\-EL\fR, \fB\-O\fR\fInumber\fR, \fB\-\-allow\-shlib\-undefined\fR, \fB\-\-dc\fR, \fB\-\-dp\fR, \fB\-\-end\-group\fR, \fB\-\-no\-add\-needed\fR, \fB\-\-no\-allow\-shlib\-undefined\fR, \fB\-\-no\-copy\-dt\-needed\-entries\fR, \fB\-\-nostdlib\fR, \fB\-\-rpath\-link=Ar dir\fR, \fB\-\-sort\-common\fR, \fB\-\-sort\-section\fR, \fB\-\-start\-group\fR, \fB\-\-warn\-constructors\fR, \fB\-\-warn\-once\fR, \fB\-\-fix\-cortex\-a53\-835769\fR, \fB\-\-fix\-cortex\-a53\-843419\fR, \fB\-z combreloc\fR, \fB\-z common\-page\-size\fR, \fB\-z nocombreloc\fR +\fB\-(\fR, \fB\-)\fR, \fB\-EL\fR, \fB\-O\fR\fInumber\fR, \fB\-\-dc\fR, \fB\-\-dp\fR, \fB\-\-end\-group\fR, \fB\-\-no\-add\-needed\fR, \fB\-\-no\-copy\-dt\-needed\-entries\fR, \fB\-\-nostdlib\fR, \fB\-\-rpath\-link=Ar dir\fR, \fB\-\-sort\-common\fR, \fB\-\-sort\-section\fR, \fB\-\-start\-group\fR, \fB\-\-warn\-constructors\fR, \fB\-\-warn\-once\fR, \fB\-\-fix\-cortex\-a53\-835769\fR, \fB\-\-fix\-cortex\-a53\-843419\fR, \fB\-z combreloc\fR, \fB\-z common\-page\-size\fR, \fB\-z nocombreloc\fR Ignored .SH "ENVIRONMENT VARIABLES" .TP diff --git a/docs/mold.md b/docs/mold.md index 7ba64401..19e7c25b 100644 --- a/docs/mold.md +++ b/docs/mold.md @@ -152,6 +152,9 @@ but as `-o magic`. * `--no-color-diagnostics`: Synonym for `--color-diagnostics=never`. +* `--detach`, `--no-detach: + Permit or do not permit mold to create a debug info file in the background. + * `--fork`, `--no-fork`: Spawn a child process and let it do the actual linking. When linking a large program, the OS kernel can take a few hundred milliseconds to terminate a @@ -198,10 +201,29 @@ but as `-o magic`. easily test that your program works in the reversed initialization order. * `--run` _command_ _arg_...: - Run _command_ with `mold` `/usr/bin/ld`. Specifically, `mold` runs a given - command with the `LD_PRELOAD` environment set to intercept exec(3) family - functions and replaces `argv[0]` with itself if it is `ld`, `ld.gold`, or - `ld.lld`. + Run _command_ with `mold` as `/usr/bin/ld`. Specifically, `mold` runs a + given command with the `LD_PRELOAD` environment set to intercept exec(3) + family functions and replaces `argv[0]` with itself if it is `ld`, + `ld.gold`, or `ld.lld`. + +* `--separate-debug-file`, `--separate-debug-file`=_file_: + Bundle debug info sections into a separate file instead of embedding them in + an output executable or a shared library. mold creates a debug info file in + the background by default, so that you can start running your executable as + soon as possible. + + By default, the debug info file is created in the same directory as is the + output file, with the `.dbg` file extension. That filename is embedded into + the output file so that `gdb` can automatically find the debug info file for + the output file. For more info about gdb features related to separate debug + files, see + . + + mold holds a file lock with flock(2) while creating a debug info file in the + background. + + If you don't want to create a debug info file in the background, pass the + `--no-detach` option. * `--shuffle-sections`, `--shuffle-sections`=_number_: Randomize the output by shuffling the order of input sections before @@ -246,6 +268,33 @@ but as `-o magic`. * `--quick-exit`, `--no-quick-exit`: Use or do not use `quick_exit` to exit. +* `-z rewrite-endbr`, `-z norewrite-endbr`: + As a security measure, some CPU instruction sets have recently gained a + feature to protect control flow integrity by disallowing indirect branches + by default. If the feature is enabled, the instruction that is executed + immediately after an indirect branch must be an branch target marker + instruction, or a CPU-level fault will raise. The marker instruction is also + known as "landing pad" instruction, to which indirect branches can land. + This feature makes ROP attacks harder to conduct. + + To use the feature, a function whose pointer is taken needs to begin with a + landing pad because a function call via a function pointer is compiled to an + indirect branch. On the other hand, if a function is called only directly + (i.e. referred to only by _direct_ branch instructions), it doesn't have to + begin with it. + + By default, the compiler always emits a landing pad at the beginning of each + global function because it doesn't know whether or not the function's + pointer is taken in another translation unit. As a result, the resulting + binary has more attack surface than necessary. + + If `--rewrite-endbr` is given, mold conducts a whole program analysis + to identify functions whose addresses are actually taken and rewrites + landing pads with no-ops for non-address-taken functions, reducing the + attack surface. + + This feature is currently available only on x86-64. + ## GNU-COMPATIBLE OPTIONS * `--help`: @@ -390,6 +439,23 @@ but as `-o magic`. report an error for duplicate definitions and instead use the first definition. +* `--allow-shlib-undefined`, `--no-allow-shlib-undefined`: + Even if mold succeeds in linking a main executable without undefined symbol + errors, you may still encounter symbol lookup errors at runtime because the + dynamic linker cannot find some symbols in shared libraries in any ELF + module. This occurs because mold ignores undefined symbols in shared + libraries by default. + + If you pass `--no-allow-shlib-undefined`, mold verifies that undefined + symbols in shared libraries given to the linker can be resolved at + link-time. In other words, this converts the runtime error to a link-time + error. + + Note that you need to pass all shared libraries, including indirectly + dependent ones, to the linker as arguments for `-l`. If a shared library + depends on a library that's not passed to the linker, the verification will + be skipped for that file. + * `--as-needed`, `--no-as-needed`: By default, shared libraries given to the linker are unconditionally added to the list of required libraries in an output file. However, shared @@ -401,13 +467,13 @@ but as `-o magic`. The `--no-as-needed` option restores the default behavior for subsequent files. -* `--build-id`=[ `md5` | `sha1` | `sha256` | `uuid` | `0x`_hexstring_ | `none` ]: +* `--build-id`=[ `md5` | `sha1` | `sha256` | `fast` | `uuid` | `0x`_hexstring_ | `none` ]: Create a `.note.gnu.build-id` section containing a byte string to uniquely identify an output file. `sha256` compute a 256-bit cryptographic hash of an output file and set it to build-id. `md5` and `sha1` compute the same hash but truncate it to 128 and 160 bits, respectively, before setting it to build-id. `uuid` sets a random 128-bit UUID. `0x`_hexstring_ sets - _hexstring_. + _hexstring_. `fast` is a synonym for `sha256`. * `--build-id`: Synonym for `--build-id=sha256`. @@ -463,17 +529,22 @@ but as `-o magic`. `--disable-new-dtags`, `mold` emits `DT_RPATH` for `--rpath` instead. * `--execute-only`: - Traditionally, most processors require both executable and readable bits to - 1 to make the page executable, which allows machine code to be read as data - at runtime. This is actually what an attacker often does after gaining a - limited control of a process to find pieces of machine code they can use to - gain the full control of the process. As a mitigation, some recent - processors allows "execute-only" pages. If a page is execute-only, you can - call a function there as long as you know its address but can't read it as - data. - - This option marks text segments execute-only. This option currently works - only on some ARM64 processors. + + Traditionally, setting the executable bit to 1 for a memory page implies + that the page also become readable, which allows machine code to be read + as data at runtime. That is actually what an attacker often does after + gaining a limited control of a process to find pieces of machine code + they can use to gain the full control of the process. As a mitigation, + recent processors including some ARM64 ones allows "execute-only" pages. + If a page is execute-only, you can call a function there as long as you + know its address but can't read it as data. + + This option marks text segments as execute-only by setting just the "X" + bit instead of "RX". Note that on most systems, the absence of the "R" + bit in the text segment serves just as a hint. If you run a program + linked with `--execute-only` on a processor that doesn't support + execute-only pages, your executable will likely still function normally, + but the text segment will remain readable. * `--exclude-libs`=_libraries_ ...: Mark all symbols in the given _libraries_ hidden. @@ -558,11 +629,6 @@ but as `-o magic`. shared libraries linked with `--pack-dyn-relocs=relr`. As of 2022, only ChromeOS, Android and Fuchsia support it. -* `--package-metadata`=_string_: - Embed _string_ to a `.note.package` section. This option is intended to be - used by a package management command such as rpm(8) to embed metadata - regarding a package to each executable file. - * `--pie`, `--pic-executable`, `--no-pie`, `--no-pic-executable`: Create a position-independent executable. @@ -809,7 +875,7 @@ but as `-o magic`. * `-z interpose`: Mark object to interpose all DSOs but executable. -* `-(`, `-)`, `-EL`, `-O`_number_, `--allow-shlib-undefined`, `--dc`, `--dp`, `--end-group`, `--no-add-needed`, `--no-allow-shlib-undefined`, `--no-copy-dt-needed-entries`, `--nostdlib`, `--rpath-link=Ar dir`, `--sort-common`, `--sort-section`, `--start-group`, `--warn-constructors`, `--warn-once`, `--fix-cortex-a53-835769`, `--fix-cortex-a53-843419`, `-z combreloc`, `-z common-page-size`, `-z nocombreloc`: +* `-(`, `-)`, `-EL`, `-O`_number_, `--dc`, `--dp`, `--end-group`, `--no-add-needed`, `--no-copy-dt-needed-entries`, `--nostdlib`, `--rpath-link=Ar dir`, `--sort-common`, `--sort-section`, `--start-group`, `--warn-constructors`, `--warn-once`, `--fix-cortex-a53-835769`, `--fix-cortex-a53-843419`, `-z combreloc`, `-z common-page-size`, `-z nocombreloc`: Ignored ## ENVIRONMENT VARIABLES @@ -832,6 +898,8 @@ but as `-o magic`. consider setting this environment variable to `1` to see if it addresses the OOM issue. + Currently, any value other than `1` is silently ignored. + * `MOLD_DEBUG`: If this variable is set to a non-empty string, `mold` embeds its command-line options in the output file's `.comment` section. diff --git a/elf/arch-alpha.cc b/elf/arch-alpha.cc deleted file mode 100644 index d7189434..00000000 --- a/elf/arch-alpha.cc +++ /dev/null @@ -1,330 +0,0 @@ -// Alpha is a 64-bit RISC ISA developed by DEC (Digital Equipment -// Corporation) in the early '90s. It aimed to be an ISA that would last -// 25 years. DEC expected Alpha would become 1000x faster during that time -// span. Since the ISA was developed from scratch for future machines, -// it's 64-bit from the beginning. There's no 32-bit variant. -// -// DEC ported its own Unix (Tru64) to Alpha. Microsoft also ported Windows -// NT to it. But it wasn't a huge commercial success. -// -// DEC was acquired by Compaq in 1997. In the late '90s, Intel and -// Hewlett-Packard were advertising that their upcoming Itanium processor -// would achieve significantly better performance than RISC processors, so -// Compaq decided to discontinue the Alpha processor line to switch to -// Itanium. Itanium resulted in a miserable failure, but it still suceeded -// to wipe out several RISC processors just by promising overly optimistic -// perf numbers. Alpha as an ISA would probably have been fine after 25 -// years since its introduction (which is 1992 + 25 = 2017), but the -// company and its market didn't last that long. -// -// From the linker's point of view, there are a few peculiarities in its -// psABI as shown below: -// -// - Alpha lacks PC-relative memory load/store instructions, so it uses -// register-relative load/store instructions in position-independent -// code. Specifically, GP (which is an alias for $r29) is always -// maintained to refer to .got+0x8000, and global variables' addresses -// are loaded in a GP-relative manner. -// -// - It looks like even function addresses are first loaded to register -// in a GP-relative manner before calling it. We can relax it to -// convert the instruction sequence with a direct branch instruction, -// but by default, object files don't use a direct branch to call a -// function. Therefore, by default, we don't need to create a PLT. -// Any function call is made by first reading its address from GOT and -// jump to the address. - -#include "mold.h" - -namespace mold::elf { - -using E = ALPHA; - -// A 32-bit immediate can be materialized in a register with a "load high" -// and a "load low" instruction sequence. The first instruction sets the -// upper 16 bits in a register, and the second one set the lower 16 -// bits. When doing so, they sign-extend an immediate. Therefore, if the -// 15th bit of an immediate happens to be 1, setting a "low half" value -// negates the upper 16 bit values that has already been set in a -// register. To compensate that, we need to add 0x8000 when setting the -// upper 16 bits. -static u32 hi(u32 val) { - return bits(val + 0x8000, 31, 16); -} - -template <> -void write_plt_header(Context &ctx, u8 *buf) {} - -template <> -void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) {} - -template <> -void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) {} - -template <> -void EhFrameSection::apply_eh_reloc(Context &ctx, const ElfRel &rel, - u64 offset, u64 val) { - u8 *loc = ctx.buf + this->shdr.sh_offset + offset; - - switch (rel.r_type) { - case R_NONE: - break; - case R_ALPHA_SREL32: - *(ul32 *)loc = val - this->shdr.sh_addr - offset; - break; - default: - Fatal(ctx) << "unsupported relocation in .eh_frame: " << rel; - } -} - -template <> -void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - u64 S = sym.get_addr(ctx); - u64 A = rel.r_addend; - u64 P = get_addr() + rel.r_offset; - u64 G = sym.get_got_idx(ctx) * sizeof(Word); - u64 GOT = ctx.got->shdr.sh_addr; - u64 GP = ctx.got->shdr.sh_addr + 0x8000; - - switch (rel.r_type) { - case R_ALPHA_REFQUAD: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, &dynrel); - break; - case R_ALPHA_GPREL32: - *(ul32 *)loc = S + A - GP; - break; - case R_ALPHA_LITERAL: - if (A) - *(ul16 *)loc = ctx.extra.got->get_addr(sym, A) - GP; - else - *(ul16 *)loc = GOT + G - GP; - break; - case R_ALPHA_BRSGP: - *(ul32 *)loc |= bits(S + A - P - 4, 22, 0); - break; - case R_ALPHA_GPDISP: - *(ul16 *)loc = hi(GP - P); - *(ul16 *)(loc + A) = GP - P; - break; - case R_ALPHA_SREL32: - *(ul32 *)loc = S + A - P; - break; - case R_ALPHA_GPRELHIGH: - *(ul16 *)loc = hi(S + A - GP); - break; - case R_ALPHA_GPRELLOW: - *(ul16 *)loc = S + A - GP; - break; - case R_ALPHA_TLSGD: - *(ul16 *)loc = sym.get_tlsgd_addr(ctx) - GP; - break; - case R_ALPHA_TLSLDM: - *(ul16 *)loc = ctx.got->get_tlsld_addr(ctx) - GP; - break; - case R_ALPHA_DTPRELHI: - *(ul16 *)loc = hi(S + A - ctx.dtp_addr); - break; - case R_ALPHA_DTPRELLO: - *(ul16 *)loc = S + A - ctx.dtp_addr; - break; - case R_ALPHA_GOTTPREL: - *(ul16 *)loc = sym.get_gottp_addr(ctx) + A - GP; - break; - case R_ALPHA_TPRELHI: - *(ul16 *)loc = hi(S + A - ctx.tp_addr); - break; - case R_ALPHA_TPRELLO: - *(ul16 *)loc = S + A - ctx.tp_addr; - break; - case R_ALPHA_LITUSE: - case R_ALPHA_HINT: - break; - default: - unreachable(); - } - } -} - -template <> -void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; - - SectionFragment *frag; - i64 frag_addend; - std::tie(frag, frag_addend) = get_fragment(ctx, rel); - - u64 S = frag ? frag->get_addr(ctx) : sym.get_addr(ctx); - u64 A = frag ? frag_addend : (i64)rel.r_addend; - - switch (rel.r_type) { - case R_ALPHA_REFLONG: - if (std::optional val = get_tombstone(sym, frag)) - *(ul32 *)loc = *val; - else - *(ul32 *)loc = S + A; - break; - case R_ALPHA_REFQUAD: - if (std::optional val = get_tombstone(sym, frag)) - *(ul64 *)loc = *val; - else - *(ul64 *)loc = S + A; - break; - default: - Fatal(ctx) << *this << ": invalid relocation for non-allocated sections: " - << rel; - } - } -} - -template <> -void InputSection::scan_relocations(Context &ctx) { - assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); - std::span> rels = get_rels(ctx); - - for (i64 i = 0; i < rels.size(); i++) { - const ElfRel &rel = rels[i]; - if (rel.r_type == R_NONE || record_undef_error(ctx, rel)) - continue; - - Symbol &sym = *file.symbols[rel.r_sym]; - - if (sym.is_ifunc()) - Error(ctx) << sym << ": GNU ifunc symbol is not supported on Alpha"; - - switch (rel.r_type) { - case R_ALPHA_REFQUAD: - scan_dyn_absrel(ctx, sym, rel); - break; - case R_ALPHA_LITERAL: - if (rel.r_addend) - ctx.extra.got->add_symbol(sym, rel.r_addend); - else - sym.flags |= NEEDS_GOT; - break; - case R_ALPHA_SREL32: - scan_pcrel(ctx, sym, rel); - break; - case R_ALPHA_BRSGP: - if (sym.is_imported) - sym.flags |= NEEDS_PLT; - break; - case R_ALPHA_TLSGD: - sym.flags |= NEEDS_TLSGD; - break; - case R_ALPHA_TLSLDM: - ctx.needs_tlsld = true; - break; - case R_ALPHA_GOTTPREL: - sym.flags |= NEEDS_GOTTP; - break; - case R_ALPHA_TPRELHI: - case R_ALPHA_TPRELLO: - check_tlsle(ctx, sym, rel); - break; - case R_ALPHA_GPREL32: - case R_ALPHA_LITUSE: - case R_ALPHA_GPDISP: - case R_ALPHA_HINT: - case R_ALPHA_GPRELHIGH: - case R_ALPHA_GPRELLOW: - case R_ALPHA_DTPRELHI: - case R_ALPHA_DTPRELLO: - break; - default: - Fatal(ctx) << *this << ": unknown relocation: " << rel; - } - } -} - -// An R_ALPHA_LITERAL relocation may request the linker to create a GOT -// entry for an external symbol with a non-zero addend. This is an unusual -// request which is not found in any other targets. -// -// Referring an external symbol with a non-zero addend is a bad practice -// because we need to create as many dynamic relocations as the number of -// distinctive addends for the same symbol. -// -// We don't want to mess up the implementation of the common GOT section -// for Alpha. So we create another GOT-like section, .alpha_got. Any GOT -// entry for an R_ALPHA_LITERAL reloc with a non-zero addend is created -// not in .got but in .alpha_got. -// -// Since .alpha_got entries are accessed relative to GP, .alpha_got -// needs to be close enough to .got. It's actually placed next to .got. -void AlphaGotSection::add_symbol(Symbol &sym, i64 addend) { - assert(addend); - std::scoped_lock lock(mu); - entries.push_back({&sym, addend}); -} - -bool operator<(const AlphaGotSection::Entry &a, const AlphaGotSection::Entry &b) { - return std::tuple(a.sym->file->priority, a.sym->sym_idx, a.addend) < - std::tuple(b.sym->file->priority, b.sym->sym_idx, b.addend); -}; - -u64 AlphaGotSection::get_addr(Symbol &sym, i64 addend) { - auto it = std::lower_bound(entries.begin(), entries.end(), Entry{&sym, addend}); - assert(it != entries.end()); - return this->shdr.sh_addr + (it - entries.begin()) * sizeof(Word); -} - -i64 AlphaGotSection::get_reldyn_size(Context &ctx) const { - i64 n = 0; - for (const Entry &e : entries) - if (e.sym->is_imported || (ctx.arg.pic && !e.sym->is_absolute())) - n++; - return n; -} - -void AlphaGotSection::finalize() { - sort(entries); - remove_duplicates(entries); - shdr.sh_size = entries.size() * sizeof(Word); -} - -void AlphaGotSection::copy_buf(Context &ctx) { - ElfRel *dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - reldyn_offset); - - for (i64 i = 0; i < entries.size(); i++) { - Entry &e = entries[i]; - u64 P = this->shdr.sh_addr + sizeof(Word) * i; - ul64 *buf = (ul64 *)(ctx.buf + this->shdr.sh_offset + sizeof(Word) * i); - - if (e.sym->is_imported) { - *buf = ctx.arg.apply_dynamic_relocs ? e.addend : 0; - *dynrel++ = ElfRel(P, E::R_ABS, e.sym->get_dynsym_idx(ctx), e.addend); - } else { - *buf = e.sym->get_addr(ctx) + e.addend; - if (ctx.arg.pic && !e.sym->is_absolute()) - *dynrel++ = ElfRel(P, E::R_RELATIVE, 0, *buf); - } - } -} - -} // namespace mold::elf diff --git a/install-build-deps.sh b/install-build-deps.sh index 8e5568fa..4ef3bac1 100755 --- a/install-build-deps.sh +++ b/install-build-deps.sh @@ -19,15 +19,12 @@ ubuntu-* | pop-* | linuxmint-* | debian-* | raspbian-*) fedora-* | amzn-* | rhel-*) dnf install -y gcc-g++ cmake glibc-static libstdc++-static diffutils util-linux ;; -opensuse-leap-*) - zypper install -y make cmake gcc-c++ gcc11-c++ glibc-devel-static tar diffutils util-linux - ;; -opensuse-tumbleweed-*) +opensuse-*) zypper install -y make cmake gcc-c++ glibc-devel-static tar diffutils util-linux ;; gentoo-*) emerge-webrsync - emerge dev-build/cmake + FEATURES='getbinpkg binpkg-request-signature' emerge dev-build/cmake ;; arch-* | archarm-* | artix-* | endeavouros-*) pacman -Sy --needed --noconfirm base-devel cmake util-linux @@ -43,6 +40,13 @@ clear-linux-*) swupd update swupd bundle-add c-basic diffutils ;; +almalinux-*) + dnf install -y gcc-toolset-13-gcc-c++ gcc-toolset-13-libstdc++-devel cmake diffutils + ;; +freebsd-*) + pkg update + pkg install -y cmake bash binutils gcc + ;; *) echo "Error: don't know anything about build dependencies on $ID-$VERSION_ID" exit 1 diff --git a/install-cross-tools.sh b/install-cross-tools.sh index 86dc10dd..fcac8ef8 100755 --- a/install-cross-tools.sh +++ b/install-cross-tools.sh @@ -11,7 +11,7 @@ set -x case "$ID-$VERSION_ID" in ubuntu-* | pop-* | linuxmint-* | debian-* | raspbian-*) - apt-get install -y qemu-user {gcc,g++}-{i686,aarch64,riscv64,powerpc,powerpc64,powerpc64le,s390x,sparc64,m68k,sh4,alpha}-linux-gnu {gcc,g++}-arm-linux-gnueabihf + apt-get install -y qemu-user {gcc,g++}-{i686,aarch64,riscv64,powerpc,powerpc64,powerpc64le,s390x,sparc64,m68k,sh4}-linux-gnu {gcc,g++}-arm-linux-gnueabihf ;; *) echo "Error: don't know anything about build dependencies on $ID-$VERSION_ID" diff --git a/common/archive-file.h b/lib/archive-file.h similarity index 93% rename from common/archive-file.h rename to lib/archive-file.h index 11158f49..9ce4a030 100644 --- a/common/archive-file.h +++ b/lib/archive-file.h @@ -26,7 +26,6 @@ #pragma once #include "common.h" -#include "filetype.h" namespace mold { @@ -76,7 +75,7 @@ struct ArHdr { } }; -template +template std::vector read_thin_archive_members(Context &ctx, MappedFile *mf) { u8 *begin = mf->data; @@ -124,7 +123,7 @@ read_thin_archive_members(Context &ctx, MappedFile *mf) { return vec; } -template +template std::vector read_fat_archive_members(Context &ctx, MappedFile *mf) { u8 *begin = mf->data; u8 *data = begin + 8; @@ -162,16 +161,13 @@ std::vector read_fat_archive_members(Context &ctx, MappedFile *mf) return vec; } -template +template std::vector read_archive_members(Context &ctx, MappedFile *mf) { - switch (get_file_type(ctx, mf)) { - case FileType::AR: + std::string_view str = mf->get_contents(); + if (str.starts_with("!\n")) return read_fat_archive_members(ctx, mf); - case FileType::THIN_AR: - return read_thin_archive_members(ctx, mf); - default: - unreachable(); - } + assert(str.starts_with("!\n")); + return read_thin_archive_members(ctx, mf); } } // namespace mold diff --git a/common/common.h b/lib/common.h similarity index 90% rename from common/common.h rename to lib/common.h index 986448e1..d915c97e 100644 --- a/common/common.h +++ b/lib/common.h @@ -2,6 +2,7 @@ #include "integers.h" +#include #include #include #include @@ -61,8 +62,6 @@ namespace mold { using namespace std::literals::string_literals; using namespace std::literals::string_view_literals; -template class OutputFile; - inline char *output_tmpfile; inline u8 *output_buffer_start = nullptr; @@ -75,7 +74,7 @@ std::string get_self_path(); void cleanup(); void install_signal_handler(); -static u64 combine_hash(u64 a, u64 b) { +inline u64 combine_hash(u64 a, u64 b) { return a ^ (b + 0x9e3779b9 + (a << 6) + (a >> 2)); } @@ -442,10 +441,9 @@ inline i64 write_string(void *buf, std::string_view str) { } template -inline i64 write_vector(void *buf, const std::vector &vec) { - i64 sz = vec.size() * sizeof(T); - memcpy(buf, vec.data(), sz); - return sz; +inline void write_vector(void *buf, const std::vector &vec) { + if (!vec.empty()) + memcpy(buf, vec.data(), vec.size() * sizeof(T)); } inline void encode_uleb(std::vector &vec, u64 val) { @@ -525,23 +523,6 @@ inline void overwrite_uleb(u8 *loc, u64 val) { *loc = val & 0b0111'1111; } -template -std::string_view save_string(Context &ctx, const std::string &str) { - u8 *buf = new u8[str.size() + 1]; - memcpy(buf, str.data(), str.size()); - buf[str.size()] = '\0'; - ctx.string_pool.push_back(std::unique_ptr(buf)); - return {(char *)buf, str.size()}; -} - -inline bool remove_prefix(std::string_view &s, std::string_view prefix) { - if (s.starts_with(prefix)) { - s = s.substr(prefix.size()); - return true; - } - return false; -} - static inline void pause() { #if defined(__x86_64__) asm volatile("pause"); @@ -726,74 +707,6 @@ class ConcurrentMap { void get_random_bytes(u8 *buf, i64 size); -// -// output-file.h -// - -template -class OutputFile { -public: - static std::unique_ptr> - open(Context &ctx, std::string path, i64 filesize, i64 perm); - - virtual void close(Context &ctx) = 0; - virtual ~OutputFile() = default; - - u8 *buf = nullptr; - std::vector buf2; - std::string path; - i64 fd = -1; - i64 filesize = 0; - bool is_mmapped = false; - bool is_unmapped = false; - -protected: - OutputFile(std::string path, i64 filesize, bool is_mmapped) - : path(path), filesize(filesize), is_mmapped(is_mmapped) {} -}; - -template -class MallocOutputFile : public OutputFile { -public: - MallocOutputFile(Context &ctx, std::string path, i64 filesize, i64 perm) - : OutputFile(path, filesize, false), ptr(new u8[filesize]), - perm(perm) { - this->buf = ptr.get(); - } - - void close(Context &ctx) override { - Timer t(ctx, "close_file"); - FILE *fp; - - if (this->path == "-") { - fp = stdout; - } else { -#ifdef _WIN32 - int pmode = (perm & 0200) ? (_S_IREAD | _S_IWRITE) : _S_IREAD; - i64 fd = _open(this->path.c_str(), _O_RDWR | _O_CREAT | _O_BINARY, pmode); -#else - i64 fd = ::open(this->path.c_str(), O_RDWR | O_CREAT, perm); -#endif - if (fd == -1) - Fatal(ctx) << "cannot open " << this->path << ": " << errno_string(); -#ifdef _WIN32 - fp = _fdopen(fd, "wb"); -#else - fp = fdopen(fd, "w"); -#endif - } - - fwrite(this->buf, this->filesize, 1, fp); - if (!this->buf2.empty()) - fwrite(this->buf2.data(), this->buf2.size(), 1, fp); - fclose(fp); - } - -private: - std::unique_ptr ptr; - i64 perm; -}; - // // hyperloglog.cc // @@ -899,6 +812,13 @@ std::optional demangle_rust(std::string_view name); void acquire_global_lock(); void release_global_lock(); +// +// crc32.cc +// + +u32 compute_crc32(u32 crc, u8 *buf, i64 len); +std::vector crc32_solve(u32 current, u32 desired); + // // compress.cc // diff --git a/common/compress.cc b/lib/compress.cc similarity index 100% rename from common/compress.cc rename to lib/compress.cc diff --git a/common/config.h.in b/lib/config.h.in similarity index 100% rename from common/config.h.in rename to lib/config.h.in diff --git a/lib/crc32.cc b/lib/crc32.cc new file mode 100644 index 00000000..d3f71783 --- /dev/null +++ b/lib/crc32.cc @@ -0,0 +1,60 @@ +#include "common.h" + +#include +#include + +namespace mold { + +// This function "forges" a CRC. That is, given the current and a desired +// CRC32 value, crc32_solve() returns a binary blob to add to the end of +// the original data to yield the desired CRC. Trailing garbage is ignored +// by many bianry file formats, so you can create a file with a desired +// CRC using crc32_solve(). We need it for --separate-debug-file. +std::vector crc32_solve(u32 current, u32 desired) { + constexpr u32 poly = 0xedb88320; + u32 x = ~desired; + + // Each iteration computes x = (x * x^-1) mod poly. + for (i64 i = 0; i < 32; i++) { + x = std::rotl(x, 1); + x ^= (x & 1) * (poly << 1); + } + + x ^= ~current; + + std::vector out(4); + out[0] = x; + out[1] = x >> 8; + out[2] = x >> 16; + out[3] = x >> 24; + return out; +} + +// Compute a CRC for given data in parallel +u32 compute_crc32(u32 crc, u8 *buf, i64 len) { + struct Shard { + u8 *buf; + i64 len; + u32 crc; + }; + + constexpr i64 shard_size = 1024 * 1024; // 1 MiB + std::vector shards; + + while (len > 0) { + i64 sz = std::min(len, shard_size); + shards.push_back({buf, sz, 0}); + buf += sz; + len -= sz; + } + + tbb::parallel_for_each(shards.begin(), shards.end(), [](Shard &shard) { + shard.crc = crc32(0, shard.buf, shard.len); + }); + + for (Shard &shard : shards) + crc = crc32_combine(crc, shard.crc, shard.len); + return crc; +} + +} // namespace mold diff --git a/common/demangle.cc b/lib/demangle.cc similarity index 100% rename from common/demangle.cc rename to lib/demangle.cc diff --git a/common/filepath.cc b/lib/filepath.cc similarity index 100% rename from common/filepath.cc rename to lib/filepath.cc diff --git a/test/gentoo-test.sh b/lib/gentoo-test.sh similarity index 89% rename from test/gentoo-test.sh rename to lib/gentoo-test.sh index fd5c4ca8..dbdae006 100755 --- a/test/gentoo-test.sh +++ b/lib/gentoo-test.sh @@ -26,12 +26,13 @@ if ! docker image ls mold-gentoo | grep -q mold-gentoo; then cat <> /etc/portage/make.conf && \ +RUN echo 'USE="X ssl elogind -systemd corefonts truetype jpeg jpeg2k tiff zstd static-libs binary -perl"' >> /etc/portage/make.conf && \ echo 'ACCEPT_KEYWORDS="~amd64"' >> /etc/portage/make.conf && \ echo 'ACCEPT_LICENSE="* -@EULA"' >> /etc/portage/make.conf && \ echo 'FEATURES="\${FEATURE} noclean nostrip ccache -ipc-sandbox -network-sandbox -pid-sandbox -sandbox"' >> /etc/portage/make.conf && \ - echo 'CCACHE_DIR="/ccache"' >> /etc/portage/make.conf -RUN emerge gdb lld clang vim emacs strace ccache xeyes dev-build/cmake dev-vcs/git && rm -rf /var/tmp/portage + echo 'CCACHE_DIR="/ccache"' >> /etc/portage/make.conf && \ + emerge gdb lld clang vim emacs strace ccache xeyes dev-build/cmake dev-vcs/git && \ + rm -rf /var/tmp/portage EOF set +e fi diff --git a/common/glob.cc b/lib/glob.cc similarity index 100% rename from common/glob.cc rename to lib/glob.cc diff --git a/common/hyperloglog.cc b/lib/hyperloglog.cc similarity index 100% rename from common/hyperloglog.cc rename to lib/hyperloglog.cc diff --git a/lib/integers.h b/lib/integers.h new file mode 100644 index 00000000..11582f70 --- /dev/null +++ b/lib/integers.h @@ -0,0 +1,144 @@ +// This file defines integral types for file input/output. We need to use +// these types instead of the plain integers (such as uint32_t or int32_t) +// when reading from/writing to an mmap'ed file area for the following +// reasons: +// +// 1. mold is always a cross linker and should not depend on what host it +// is running on. For example, users should be able to run mold on a +// big-endian SPARC machine to create a little-endian RV64 binary. +// +// 2. Even though data members in all ELF data strucutres are naturally +// aligned, they are not guaranteed to be aligned on memory because of +// archive files. Archive files (.a files) align each member only to a +// 2 byte boundary, so anything larger than 2 bytes may be misaligned +// in an mmap'ed memory. Misaligned access is an undefined behavior in +// C/C++, so we shouldn't cast an arbitrary pointer to a uint32_t, for +// example, to read a 32 bit value. +// +// The data types defined in this file don't depend on host byte order and +// don't do unaligned access. + +#pragma once + +#include +#include +#include + +#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) +# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define __LITTLE_ENDIAN__ 1 +# elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define __BIG_ENDIAN__ 1 +# else +# error "unknown host byte order" +# endif +#endif + +namespace mold { + +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +typedef int8_t i8; +typedef int16_t i16; +typedef int32_t i32; +typedef int64_t i64; + +template +class Integer { +public: + constexpr Integer() = default; + + constexpr Integer(T x) requires (endian == std::endian::little && size == 2) + : buf{(u8)x, (u8)(x >> 8)} {} + + constexpr Integer(T x) requires (endian == std::endian::little && size == 3) + : buf{(u8)x, (u8)(x >> 8), (u8)(x >> 16)} {} + + constexpr Integer(T x) requires (endian == std::endian::little && size == 4) + : buf{(u8)x, (u8)(x >> 8), (u8)(x >> 16), (u8)(x >> 24)} {} + + constexpr Integer(T x) requires (endian == std::endian::little && size == 8) + : buf{(u8)x, (u8)(x >> 8), (u8)(x >> 16), (u8)(x >> 24), + (u8)(x >> 32), (u8)(x >> 40), (u8)(x >> 48), (u8)(x >> 56)} {} + + constexpr Integer(T x) requires (endian == std::endian::big && size == 2) + : buf{(u8)(x >> 8), (u8)x} {} + + constexpr Integer(T x) requires (endian == std::endian::big && size == 3) + : buf{(u8)(x >> 16), (u8)(x >> 8), (u8)x} {} + + constexpr Integer(T x) requires (endian == std::endian::big && size == 4) + : buf{(u8)(x >> 24), (u8)(x >> 16), (u8)(x >> 8), (u8)x} {} + + constexpr Integer(T x) requires (endian == std::endian::big && size == 8) + : buf{(u8)(x >> 56), (u8)(x >> 48), (u8)(x >> 40), (u8)(x >> 32), + (u8)(x >> 24), (u8)(x >> 16), (u8)(x >> 8), (u8)x} {} + + Integer &operator=(T x) { + new (this) Integer(x); + return *this; + } + + operator T() const { + if constexpr (endian == std::endian::little) { + if constexpr (size == 2) + return buf[1] << 8 | buf[0]; + else if constexpr (size == 3) + return buf[2] << 16 | buf[1] << 8 | buf[0]; + else if constexpr (size == 4) + return buf[3] << 24 | buf[2] << 16 | buf[1] << 8 | buf[0]; + else + return (u64)buf[7] << 56 | (u64)buf[6] << 48 | + (u64)buf[5] << 40 | (u64)buf[4] << 32 | + (u64)buf[3] << 24 | (u64)buf[2] << 16 | + (u64)buf[1] << 8 | (u64)buf[0]; + } else { + if constexpr (size == 2) + return buf[0] << 8 | buf[1]; + else if constexpr (size == 3) + return buf[0] << 16 | buf[1] << 8 | buf[2]; + else if constexpr (size == 4) + return buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3]; + else + return (u64)buf[0] << 56 | (u64)buf[1] << 48 | + (u64)buf[2] << 40 | (u64)buf[3] << 32 | + (u64)buf[4] << 24 | (u64)buf[5] << 16 | + (u64)buf[6] << 8 | (u64)buf[7]; + } + } + + Integer &operator++() { return *this = *this + 1; } + Integer operator++(int) { return ++*this - 1; } + Integer &operator--() { return *this = *this - 1; } + Integer operator--(int) { return --*this + 1; } + Integer &operator+=(T x) { return *this = *this + x; } + Integer &operator-=(T x) { return *this = *this - x; } + Integer &operator&=(T x) { return *this = *this & x; } + Integer &operator|=(T x) { return *this = *this | x; } + +private: + u8 buf[size]; +};; + +using il16 = Integer; +using il32 = Integer; +using il64 = Integer; + +using ul16 = Integer; +using ul24 = Integer; +using ul32 = Integer; +using ul64 = Integer; + +using ib16 = Integer; +using ib32 = Integer; +using ib64 = Integer; + +using ub16 = Integer; +using ub24 = Integer; +using ub32 = Integer; +using ub64 = Integer; + +} // namespace mold diff --git a/lib/jobs-unix.cc b/lib/jobs-unix.cc new file mode 100644 index 00000000..9912ab52 --- /dev/null +++ b/lib/jobs-unix.cc @@ -0,0 +1,50 @@ +// Many build systems attempt to invoke as many linker processes as there +// are cores, based on the assumption that the linker is single-threaded. +// However, since mold is multi-threaded, such build systems' behavior is +// not beneficial and just increases the overall peak memory usage. +// On machines with limited memory, this could lead to an out-of-memory +// error. +// +// This file implements a feature that limits the number of concurrent +// mold processes to just 1 for each user. It is intended to be used as +// `MOLD_JOBS=1 ninja` or `MOLD_JOBS=1 make -j$(nproc)`. + +#include "common.h" + +#include +#include +#include +#include +#include +#include + +namespace mold { + +static int lock_fd = -1; + +void acquire_global_lock() { + char *jobs = getenv("MOLD_JOBS"); + if (!jobs || jobs != "1"s) + return; + + std::string path; + if (char *dir = getenv("XDG_RUNTIME_DIR")) + path = dir + "/mold-lock"s; + else + path = "/tmp/mold-lock-"s + getpwuid(getuid())->pw_name; + + int fd = open(path.c_str(), O_WRONLY | O_CREAT | O_CLOEXEC, 0600); + if (fd == -1) + return; + + if (lockf(fd, F_LOCK, 0) == -1) + return; + lock_fd = fd; +} + +void release_global_lock() { + if (lock_fd != -1) + close(lock_fd); +} + +} // namespace mold diff --git a/common/jobs-win32.cc b/lib/jobs-win32.cc similarity index 100% rename from common/jobs-win32.cc rename to lib/jobs-win32.cc diff --git a/common/malloc.cc b/lib/malloc.cc similarity index 100% rename from common/malloc.cc rename to lib/malloc.cc diff --git a/common/mapped-file-unix.cc b/lib/mapped-file-unix.cc similarity index 100% rename from common/mapped-file-unix.cc rename to lib/mapped-file-unix.cc diff --git a/common/mapped-file-win32.cc b/lib/mapped-file-win32.cc similarity index 100% rename from common/mapped-file-win32.cc rename to lib/mapped-file-win32.cc diff --git a/common/multi-glob.cc b/lib/multi-glob.cc similarity index 100% rename from common/multi-glob.cc rename to lib/multi-glob.cc diff --git a/common/perf.cc b/lib/perf.cc similarity index 100% rename from common/perf.cc rename to lib/perf.cc diff --git a/common/random.cc b/lib/random.cc similarity index 100% rename from common/random.cc rename to lib/random.cc diff --git a/common/signal-unix.cc b/lib/signal-unix.cc similarity index 100% rename from common/signal-unix.cc rename to lib/signal-unix.cc diff --git a/common/signal-win32.cc b/lib/signal-win32.cc similarity index 100% rename from common/signal-win32.cc rename to lib/signal-win32.cc diff --git a/common/siphash.h b/lib/siphash.h similarity index 100% rename from common/siphash.h rename to lib/siphash.h diff --git a/common/tar.cc b/lib/tar.cc similarity index 98% rename from common/tar.cc rename to lib/tar.cc index 5c0692d4..30f464bc 100644 --- a/common/tar.cc +++ b/lib/tar.cc @@ -1,3 +1,5 @@ +// This file contains functions to create a tar file. + #include "common.h" #ifdef _WIN32 diff --git a/common/update-git-hash.cmake b/lib/update-git-hash.cmake similarity index 100% rename from common/update-git-hash.cmake rename to lib/update-git-hash.cmake diff --git a/elf/arch-arm32.cc b/src/arch-arm32.cc similarity index 90% rename from elf/arch-arm32.cc rename to src/arch-arm32.cc index 291824b5..7ef37392 100644 --- a/elf/arch-arm32.cc +++ b/src/arch-arm32.cc @@ -37,7 +37,7 @@ #include #include -namespace mold::elf { +namespace mold { using E = ARM32; @@ -194,7 +194,7 @@ void write_addend(u8 *loc, i64 val, const ElfRel &rel) { template <> void write_plt_header(Context &ctx, u8 *buf) { - static const ul32 insn[] = { + constexpr ul32 insn[] = { 0xe52d'e004, // push {lr} 0xe59f'e004, // ldr lr, 2f 0xe08f'e00e, // 1: add lr, pc, lr @@ -209,7 +209,7 @@ void write_plt_header(Context &ctx, u8 *buf) { *(ul32 *)(buf + 16) = ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr - 16; } -static const ul32 plt_entry[] = { +constexpr ul32 plt_entry[] = { 0xe59f'c004, // 1: ldr ip, 2f 0xe08c'c00f, // add ip, ip, pc 0xe59c'f000, // ldr pc, [ip] @@ -256,11 +256,6 @@ template <> void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { std::span> rels = get_rels(ctx); - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - auto get_tls_trampoline_addr = [&, i = 0](u64 addr) mutable { for (; i < output_section->thunks.size(); i++) { i64 disp = output_section->shdr.sh_addr + output_section->thunks[i]->offset - @@ -299,7 +294,6 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { switch (rel.r_type) { case R_ARM_ABS32: case R_ARM_TARGET1: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, &dynrel); break; case R_ARM_REL32: *(ul32 *)loc = S + A - P; @@ -489,19 +483,21 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { // .L2: .word foo + . - .L1 // R_ARM_TLS_GOTDESC // - // We may relax the instructions to the following for non-dlopen'd DSO + // We may relax the instructions to the following if its TP-relative + // address is known at link-time // // ldr r0, .L2 - // .L1: ldr r0, [pc, r0] + // .L1: nop // ... - // .L2: .word foo(gottpoff) + . - .L1 + // .L2: .word foo(tpoff) // - // or to the following for executable. + // or to the following if the TP-relative address is known at + // process startup time. // // ldr r0, .L2 - // .L1: nop + // .L1: ldr r0, [pc, r0] // ... - // .L2: .word foo(tpoff) + // .L2: .word foo(gottpoff) + . - .L1 if (sym.has_tlsdesc(ctx)) { // A is odd if the corresponding TLS_CALL is Thumb. *(ul32 *)loc = sym.get_tlsdesc_addr(ctx) - P + A - ((A & 1) ? 6 : 4); @@ -584,8 +580,6 @@ void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { template <> void InputSection::scan_relocations(Context &ctx) { assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); std::span> rels = get_rels(ctx); // Scan relocations @@ -600,12 +594,6 @@ void InputSection::scan_relocations(Context &ctx) { sym.flags |= NEEDS_GOT | NEEDS_PLT; switch (rel.r_type) { - case R_ARM_ABS32: - case R_ARM_MOVT_ABS: - case R_ARM_THM_MOVT_ABS: - case R_ARM_TARGET1: - scan_dyn_absrel(ctx, sym, rel); - break; case R_ARM_MOVW_ABS_NC: case R_ARM_THM_MOVW_ABS_NC: scan_absrel(ctx, sym, rel); @@ -644,6 +632,10 @@ void InputSection::scan_relocations(Context &ctx) { case R_ARM_TLS_LE32: check_tlsle(ctx, sym, rel); break; + case R_ARM_ABS32: + case R_ARM_MOVT_ABS: + case R_ARM_THM_MOVT_ABS: + case R_ARM_TARGET1: case R_ARM_REL32: case R_ARM_BASE_PREL: case R_ARM_GOTOFF32: @@ -666,7 +658,7 @@ void Thunk::copy_buf(Context &ctx) { // TLS trampoline code. ARM32's TLSDESC is designed so that this // common piece of code is factored out from object files to reduce // output size. Since no one provide, the linker has to synthesize it. - static ul32 hdr[] = { + constexpr ul32 hdr[] = { 0xe08e'0000, // add r0, lr, r0 0xe590'1004, // ldr r1, [r0, #4] 0xe12f'ff11, // bx r1 @@ -675,7 +667,7 @@ void Thunk::copy_buf(Context &ctx) { // This is a range extension and mode switch thunk. // It has two entry points: +0 for Thumb and +4 for ARM. - const u8 entry[] = { + static const u8 entry[] = { // .thumb 0x78, 0x47, // bx pc # jumps to 1f 0xc0, 0x46, // nop @@ -708,6 +700,45 @@ u64 get_eflags(Context &ctx) { return EF_ARM_EABI_VER5; } +void create_arm_exidx_section(Context &ctx) { + for (i64 i = 0; i < ctx.chunks.size(); i++) { + OutputSection *osec = ctx.chunks[i]->to_osec(); + + if (osec && osec->shdr.sh_type == SHT_ARM_EXIDX) { + auto *sec = new Arm32ExidxSection(*osec); + ctx.extra.exidx = sec; + ctx.chunks[i] = sec; + ctx.chunk_pool.emplace_back(sec); + + for (InputSection *isec : osec->members) + isec->is_alive = false; + break; + } + } +} + +void Arm32ExidxSection::compute_section_size(Context &ctx) { + output_section.compute_section_size(ctx); + this->shdr.sh_size = output_section.shdr.sh_size; +} + +void Arm32ExidxSection::update_shdr(Context &ctx) { + // .ARM.exidx's sh_link should be set to the .text section index. + // Runtime doesn't care about it, but the binutils's strip command does. + if (Chunk *chunk = find_chunk(ctx, ".text")) + this->shdr.sh_link = chunk->shndx; +} + +void Arm32ExidxSection::remove_duplicate_entries(Context &ctx) { + this->shdr.sh_size = get_contents(ctx).size(); +} + +void Arm32ExidxSection::copy_buf(Context &ctx) { + std::vector contents = get_contents(ctx); + assert(this->shdr.sh_size = contents.size()); + write_vector(ctx.buf + this->shdr.sh_offset, contents); +} + // ARM executables use an .ARM.exidx section to look up an exception // handling record for the current instruction pointer. The table needs // to be sorted by their addresses. @@ -716,13 +747,12 @@ u64 get_eflags(Context &ctx) { // I don't know why only ARM uses the different mechanism, but it's // likely that it's due to some historical reason. // -// This function sorts .ARM.exidx records. -void fixup_arm_exidx_section(Context &ctx) { - Timer t(ctx, "fixup_arm_exidx_section"); +// This function returns contents of .ARM.exidx. +std::vector Arm32ExidxSection::get_contents(Context &ctx) { + std::vector buf(output_section.shdr.sh_size); - OutputSection *osec = find_section(ctx, SHT_ARM_EXIDX); - if (!osec) - return; + output_section.shdr.sh_addr = this->shdr.sh_addr; + output_section.write_to(ctx, buf.data(), nullptr); // .ARM.exidx records consists of a signed 31-bit relative address // and a 32-bit value. The relative address indicates the start @@ -736,24 +766,24 @@ void fixup_arm_exidx_section(Context &ctx) { // // CANTUNWIND is value 1. The most significant bit is set in (2) but // not in (3). So we can distinguished them just by looking at a value. - const u32 EXIDX_CANTUNWIND = 1; + const u32 CANTUNWIND = 1; struct Entry { ul32 addr; ul32 val; }; - if (osec->shdr.sh_size % sizeof(Entry)) + if (buf.size() % sizeof(Entry)) Fatal(ctx) << "invalid .ARM.exidx section size"; - Entry *ent = (Entry *)(ctx.buf + osec->shdr.sh_offset); - i64 num_entries = osec->shdr.sh_size / sizeof(Entry); + Entry *ent = (Entry *)buf.data(); + i64 num_entries = buf.size() / sizeof(Entry); // Entry's addresses are relative to themselves. In order to sort - // records by addresses, we first translate them so that the addresses + // records by address, we first translate them so that the addresses // are relative to the beginning of the section. auto is_relative = [](u32 val) { - return val != EXIDX_CANTUNWIND && !(val & 0x8000'0000); + return val != CANTUNWIND && !(val & 0x8000'0000); }; tbb::parallel_for((i64)0, num_entries, [&](i64 i) { @@ -763,10 +793,21 @@ void fixup_arm_exidx_section(Context &ctx) { ent[i].val = 0x7fff'ffff & (ent[i].val + offset); }); - tbb::parallel_sort(ent, ent + num_entries, [](const Entry &a, const Entry &b) { + std::sort(ent, ent + num_entries, [](const Entry &a, const Entry &b) { return a.addr < b.addr; }); + // Remove duplicate adjacent entries. That is, if two adjacent functions + // have the same compact unwind info or are both CANTUNWIND, we can + // merge them into a single address range. + auto it = std::unique(ent, ent + num_entries, + [](const Entry &a, const Entry &b) { + return a.val == b.val; + }); + + num_entries = it - ent; + buf.resize(num_entries * sizeof(Entry)); + // Make addresses relative to themselves. tbb::parallel_for((i64)0, num_entries, [&](i64 i) { i64 offset = sizeof(Entry) * i; @@ -775,14 +816,7 @@ void fixup_arm_exidx_section(Context &ctx) { ent[i].val = 0x7fff'ffff & (ent[i].val - offset); }); - // .ARM.exidx's sh_link should be set to the .text section index. - // Runtime doesn't care about it, but the binutils's strip command does. - if (ctx.shdr) { - if (Chunk *text = find_section(ctx, ".text")) { - osec->shdr.sh_link = text->shndx; - ctx.shdr->copy_buf(ctx); - } - } + return buf; } -} // namespace mold::elf +} // namespace mold diff --git a/elf/arch-arm64.cc b/src/arch-arm64.cc similarity index 96% rename from elf/arch-arm64.cc rename to src/arch-arm64.cc index 90e0bd71..6fc237b8 100644 --- a/elf/arch-arm64.cc +++ b/src/arch-arm64.cc @@ -19,7 +19,7 @@ #include "mold.h" -namespace mold::elf { +namespace mold { using E = ARM64; @@ -46,7 +46,7 @@ static u64 page(u64 val) { template <> void write_plt_header(Context &ctx, u8 *buf) { - static const ul32 insn[] = { + constexpr ul32 insn[] = { 0xa9bf'7bf0, // stp x16, x30, [sp,#-16]! 0x9000'0010, // adrp x16, .got.plt[2] 0xf940'0211, // ldr x17, [x16, .got.plt[2]] @@ -68,7 +68,7 @@ void write_plt_header(Context &ctx, u8 *buf) { template <> void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { - static const ul32 insn[] = { + constexpr ul32 insn[] = { 0x9000'0010, // adrp x16, .got.plt[n] 0xf940'0211, // ldr x17, [x16, .got.plt[n]] 0x9100'0210, // add x16, x16, .got.plt[n] @@ -86,7 +86,7 @@ void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { template <> void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym) { - static const ul32 insn[] = { + constexpr ul32 insn[] = { 0x9000'0010, // adrp x16, GOT[n] 0xf940'0211, // ldr x17, [x16, GOT[n]] 0xd61f'0220, // br x17 @@ -145,11 +145,6 @@ template <> void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { std::span> rels = get_rels(ctx); - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - for (i64 i = 0; i < rels.size(); i++) { const ElfRel &rel = rels[i]; if (rel.r_type == R_NONE) @@ -173,7 +168,6 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { switch (rel.r_type) { case R_AARCH64_ABS64: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, &dynrel); break; case R_AARCH64_LDST8_ABS_LO12_NC: case R_AARCH64_ADD_ABS_LO12_NC: @@ -383,19 +377,21 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { // blr x1 // R_AARCH64_TLSDESC_CALL foo // - // We may relax the instructions to the following for non-dlopen'd DSO + // We may relax the instructions to the following if its TP-relative + // address is known at link-time // // nop // nop - // adrp x0, :gottprel:foo - // ldr x0, [x0, :gottprel_lo12:foo] + // movz x0, :tls_offset_hi:foo, lsl #16 + // movk x0, :tls_offset_lo:foo // - // or to the following for executable. + // or to the following if the TP-relative address is known at + // process startup time. // // nop // nop - // movz x0, :tls_offset_hi:foo, lsl #16 - // movk x0, :tls_offset_lo:foo + // adrp x0, :gottprel:foo + // ldr x0, [x0, :gottprel_lo12:foo] if (sym.has_tlsdesc(ctx)) { i64 val = page(sym.get_tlsdesc_addr(ctx) + A) - page(P); check(val, -(1LL << 32), 1LL << 32); @@ -488,8 +484,6 @@ void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { template <> void InputSection::scan_relocations(Context &ctx) { assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); std::span> rels = get_rels(ctx); // Scan relocations @@ -505,9 +499,6 @@ void InputSection::scan_relocations(Context &ctx) { sym.flags |= NEEDS_GOT | NEEDS_PLT; switch (rel.r_type) { - case R_AARCH64_ABS64: - scan_dyn_absrel(ctx, sym, rel); - break; case R_AARCH64_MOVW_UABS_G3: scan_absrel(ctx, sym, rel); break; @@ -567,6 +558,7 @@ void InputSection::scan_relocations(Context &ctx) { case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: check_tlsle(ctx, sym, rel); break; + case R_AARCH64_ABS64: case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_ADR_PREL_LO21: case R_AARCH64_CONDBR19: @@ -603,7 +595,7 @@ void InputSection::scan_relocations(Context &ctx) { template <> void Thunk::copy_buf(Context &ctx) { - static const ul32 insn[] = { + constexpr ul32 insn[] = { 0x9000'0010, // adrp x16, 0 # R_AARCH64_ADR_PREL_PG_HI21 0x9100'0210, // add x16, x16 # R_AARCH64_ADD_ABS_LO12_NC 0xd61f'0200, // br x16 @@ -626,4 +618,4 @@ void Thunk::copy_buf(Context &ctx) { } } -} // namespace mold::elf +} // namespace mold diff --git a/elf/arch-i386.cc b/src/arch-i386.cc similarity index 95% rename from elf/arch-i386.cc rename to src/arch-i386.cc index 1494d98d..008faaf0 100644 --- a/elf/arch-i386.cc +++ b/src/arch-i386.cc @@ -35,7 +35,7 @@ #include "mold.h" -namespace mold::elf { +namespace mold { using E = I386; @@ -226,7 +226,7 @@ static void relax_gd_to_le(u8 *loc, ElfRel rel, u64 val) { } // Relax LD to LE -static void relax_ld_to_le(u8 *loc, ElfRel rel, u64 val) { +static void relax_ld_to_le(u8 *loc, ElfRel rel, u64 tls_size) { switch (rel.r_type) { case R_386_PLT32: case R_386_PC32: { @@ -235,7 +235,7 @@ static void relax_ld_to_le(u8 *loc, ElfRel rel, u64 val) { 0x2d, 0, 0, 0, 0, // sub $tls_size, %eax }; memcpy(loc - 2, insn, sizeof(insn)); - *(ul32 *)(loc + 5) = val; + *(ul32 *)(loc + 5) = tls_size; break; } case R_386_GOT32: @@ -246,7 +246,7 @@ static void relax_ld_to_le(u8 *loc, ElfRel rel, u64 val) { 0x90, // nop }; memcpy(loc - 2, insn, sizeof(insn)); - *(ul32 *)(loc + 5) = val; + *(ul32 *)(loc + 5) = tls_size; break; } default: @@ -286,11 +286,6 @@ template <> void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { std::span> rels = get_rels(ctx); - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - for (i64 i = 0; i < rels.size(); i++) { const ElfRel &rel = rels[i]; if (rel.r_type == R_NONE) @@ -322,7 +317,6 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { *(ul16 *)loc = S + A; break; case R_386_32: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, &dynrel); break; case R_386_PC8: check(S + A - P, -(1 << 7), 1 << 7); @@ -374,7 +368,7 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { case R_386_TLS_LDM: if (ctx.got->has_tlsld(ctx)) *(ul32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT; - else + else relax_ld_to_le(loc, rels[++i], ctx.tp_addr - ctx.tls_begin); break; case R_386_TLS_LDO_32: @@ -392,14 +386,16 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { // call *(%eax) // R_386_TLS_DESC_CALL foo // - // We may relax the instructions to the following for non-dlopen'd DSO + // We may relax the instructions to the following if its TP-relative + // address is known at link-time // - // mov foo@GOTTPOFF(%ebx), %eax + // mov $foo@TPOFF, %eax // nop // - // or to the following for executable. + // or to the following if the TP-relative address is known at + // process startup time. // - // mov $foo@TPOFF, %eax + // mov foo@GOTTPOFF(%ebx), %eax // nop // // We allow the following alternative code sequence too because @@ -518,8 +514,6 @@ void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { template <> void InputSection::scan_relocations(Context &ctx) { assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); std::span> rels = get_rels(ctx); // Scan relocations @@ -549,9 +543,6 @@ void InputSection::scan_relocations(Context &ctx) { case R_386_16: scan_absrel(ctx, sym, rel); break; - case R_386_32: - scan_dyn_absrel(ctx, sym, rel); - break; case R_386_PC8: case R_386_PC16: case R_386_PC32: @@ -581,8 +572,7 @@ void InputSection::scan_relocations(Context &ctx) { case R_386_TLS_GD: // We always relax if -static because libc.a doesn't contain // __tls_get_addr(). - if ((ctx.arg.relax && sym.is_tprel_linktime_const(ctx)) || - ctx.arg.is_static) + if (ctx.arg.static_ || (ctx.arg.relax && sym.is_tprel_linktime_const(ctx))) i++; else sym.flags |= NEEDS_TLSGD; @@ -590,7 +580,7 @@ void InputSection::scan_relocations(Context &ctx) { case R_386_TLS_LDM: // We always relax if -static because libc.a doesn't contain // __tls_get_addr(). - if (ctx.arg.is_static || (ctx.arg.relax && !ctx.arg.shared)) + if (ctx.arg.static_ || (ctx.arg.relax && !ctx.arg.shared)) i++; else ctx.needs_tlsld = true; @@ -601,6 +591,7 @@ void InputSection::scan_relocations(Context &ctx) { case R_386_TLS_LE: check_tlsle(ctx, sym, rel); break; + case R_386_32: case R_386_GOTOFF: case R_386_TLS_LDO_32: case R_386_SIZE32: @@ -612,4 +603,4 @@ void InputSection::scan_relocations(Context &ctx) { } } -} // namespace mold::elf +} // namespace mold diff --git a/elf/arch-loongarch.cc b/src/arch-loongarch.cc similarity index 53% rename from elf/arch-loongarch.cc rename to src/arch-loongarch.cc index 54ce7e44..dda138e9 100644 --- a/elf/arch-loongarch.cc +++ b/src/arch-loongarch.cc @@ -10,21 +10,22 @@ // bootstrapping the entire ecosystem for LoongArch, sending patches to // Linux, GCC, LLVM, etc. // -// All instructions are 4 bytes long in LoongArch and aligned to 4-byte -// boundaries. It has 32 general-purpose registers. Among these, $t0 - $t8 -// (aliases for $r12 - $r20) are temporary registers that we can use in -// our PLT and range extension thunks. +// Speaking of the ISA, all instructions are 4 byte long and aligned to 4 +// byte boundaries in LoongArch. It has 32 general-purpose registers. +// Among these, $t0 - $t8 (aliases for $r12 - $r20) are temporary +// registers that we can use in our PLT. // -// The psABI defines a few linker relaxations. We haven't supported them -// yet. +// Just like RISC-V, LoongArch supports section-shrinking relaxations. +// That is, it allows linkers to rewrite certain instruction sequences to +// shorter ones. Sections are not an atomic unit of copying. // -// https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html +// https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc #if MOLD_LOONGARCH64 || MOLD_LOONGARCH32 #include "mold.h" -namespace mold::elf { +namespace mold { using E = MOLD_TARGET; @@ -50,7 +51,7 @@ static u64 hi20(u64 val, u64 pc) { return bits(page(val + 0x800) - page(pc), 31, 12); } -static u64 hi64(u64 val, u64 pc) { +static u64 higher20(u64 val, u64 pc) { // A PC-relative 64-bit address is materialized with the following // instructions for the large code model: // @@ -64,21 +65,15 @@ static u64 hi64(u64 val, u64 pc) { // ADDI.D adds a sign-extended 12 bit value to a register. LU32I.D and // LU52I.D simply set bits to [51:31] and to [63:53], respectively. // - // Compensating all the sign-extensions is a bit complicated. - u64 x = page(val) - page(pc); - if (val & 0x800) - x += 0x1000 - 0x1'0000'0000; - if (x & 0x8000'0000) - x += 0x1'0000'0000; - return x; -} - -static u64 higher20(u64 val, u64 pc) { - return bits(hi64(val, pc), 51, 32); + // Compensating all the sign-extensions is a bit complicated. The + // psABI gave the following formula. + val = val + 0x8000'0000 + ((val & 0x800) ? (0x1000 - 0x1'0000'0000) : 0); + return bits(page(val) - page(pc - 8), 51, 32); } static u64 highest12(u64 val, u64 pc) { - return bits(hi64(val, pc), 63, 52); + val = val + 0x8000'0000 + ((val & 0x800) ? (0x1000 - 0x1'0000'0000) : 0); + return bits(page(val) - page(pc - 12), 63, 52); } static void write_k12(u8 *loc, u32 val) { @@ -113,9 +108,47 @@ static void write_d10k16(u8 *loc, u32 val) { *(ul32 *)loc |= bits(val, 25, 16); } +static u32 get_rd(u32 insn) { + return bits(insn, 4, 0); +} + +static u32 get_rj(u32 insn) { + return bits(insn, 9, 5); +} + +static void set_rj(u8 *loc, u32 rj) { + assert(rj < 32); + *(ul32 *)loc &= 0b111111'1111111111111111'00000'11111; + *(ul32 *)loc |= rj << 5; +} + +// Returns true if isec's i'th relocation refers to the following +// relaxable instructioon pair. +// +// pcalau12i $t0, 0 # R_LARCH_GOT_PC_HI20 +// ld.d $t0, $t0, 0 # R_LARCH_GOT_PC_LO12 +static bool is_relaxable_got_load(Context &ctx, InputSection &isec, i64 i) { + std::span> rels = isec.get_rels(ctx); + Symbol &sym = *isec.file.symbols[rels[i].r_sym]; + + if (ctx.arg.relax && + sym.is_pcrel_linktime_const(ctx) && + i + 3 < rels.size() && + rels[i + 2].r_type == R_LARCH_GOT_PC_LO12 && + rels[i + 2].r_offset == rels[i].r_offset + 4 && + rels[i + 3].r_type == R_LARCH_RELAX) { + u32 insn1 = *(ul32 *)(isec.contents.data() + rels[i].r_offset); + u32 insn2 = *(ul32 *)(isec.contents.data() + rels[i].r_offset + 4); + bool is_ld_d = (insn2 & 0xffc0'0000) == 0x28c0'0000; + return get_rd(insn1) == get_rd(insn2) && get_rd(insn2) == get_rj(insn2) && + is_ld_d; + } + return false; +} + template <> void write_plt_header(Context &ctx, u8 *buf) { - static const ul32 insn_64[] = { + constexpr ul32 insn_64[] = { 0x1a00'000e, // pcalau12i $t2, %pc_hi20(.got.plt) 0x0011'bdad, // sub.d $t1, $t1, $t3 0x28c0'01cf, // ld.d $t3, $t2, %lo12(.got.plt) # _dl_runtime_resolve @@ -126,7 +159,7 @@ void write_plt_header(Context &ctx, u8 *buf) { 0x4c00'01e0, // jr $t3 }; - static const ul32 insn_32[] = { + constexpr ul32 insn_32[] = { 0x1a00'000e, // pcalau12i $t2, %pc_hi20(.got.plt) 0x0011'3dad, // sub.w $t1, $t1, $t3 0x2880'01cf, // ld.w $t3, $t2, %lo12(.got.plt) # _dl_runtime_resolve @@ -146,18 +179,18 @@ void write_plt_header(Context &ctx, u8 *buf) { write_k12(buf + 16, gotplt); } -static const ul32 plt_entry_64[] = { +constexpr ul32 plt_entry_64[] = { 0x1a00'000f, // pcalau12i $t3, %pc_hi20(func@.got.plt) 0x28c0'01ef, // ld.d $t3, $t3, %lo12(func@.got.plt) 0x4c00'01ed, // jirl $t1, $t3, 0 - 0x0340'0000, // nop + 0x002a'0000, // break }; -static const ul32 plt_entry_32[] = { +constexpr ul32 plt_entry_32[] = { 0x1a00'000f, // pcalau12i $t3, %pc_hi20(func@.got.plt) 0x2880'01ef, // ld.w $t3, $t3, %lo12(func@.got.plt) 0x4c00'01ed, // jirl $t1, $t3, 0 - 0x0340'0000, // nop + 0x002a'0000, // break }; template <> @@ -233,10 +266,9 @@ template <> void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { std::span> rels = get_rels(ctx); - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); + auto get_r_delta = [&](i64 idx) { + return extra.r_deltas.empty() ? 0 : extra.r_deltas[idx]; + }; for (i64 i = 0; i < rels.size(); i++) { const ElfRel &rel = rels[i]; @@ -247,7 +279,9 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { continue; Symbol &sym = *file.symbols[rel.r_sym]; - u8 *loc = base + rel.r_offset; + i64 r_offset = rel.r_offset - get_r_delta(i); + i64 removed_bytes = get_r_delta(i + 1) - get_r_delta(i); + u8 *loc = base + r_offset; auto check = [&](i64 val, i64 lo, i64 hi) { if (val < lo || hi <= val) @@ -268,32 +302,28 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { // ones. Therefore, G may refer to a TLSGD or a regular GOT slot // depending on the symbol type. // - // Note that as of August 2023, both GCC and Clang treat TLSLD relocs - // as if they were TLSGD relocs for LoongArch, which is a clear bug. - // We need to handle TLSLD relocs as synonyms for TLSGD relocs for the - // sake of bug compatibility. - auto get_got_idx = [&] { - if (sym.has_tlsgd(ctx)) - return sym.get_tlsgd_idx(ctx); - return sym.get_got_idx(ctx); - }; + // Note that even though LoongArch defines relocations for TLSLD, TLSLD + // is not actually supported on it. GCC and LLVM emit identical machine + // code for -ftls-model=global-dynamic and -ftls-model=local-dynamic, + // and we need to handle TLSLD relocations as equivalent to TLSGD + // relocations. This is clearly a compiler bug, but it's too late to + // fix. The only way to fix it would be to define a new set of + // relocations for true TLSLD and deprecate the current ones. But it + // appears that migrating to TLSDESC is a better choice, so it's + // unlikely to happen. + i64 got_idx = + sym.has_tlsgd(ctx) ? sym.get_tlsgd_idx(ctx) : sym.get_got_idx(ctx); u64 S = sym.get_addr(ctx); u64 A = rel.r_addend; - u64 P = get_addr() + rel.r_offset; - u64 G = get_got_idx() * sizeof(Word); + u64 P = get_addr() + r_offset; + u64 G = got_idx * sizeof(Word); u64 GOT = ctx.got->shdr.sh_addr; switch (rel.r_type) { case R_LARCH_32: if constexpr (E::is_64) *(ul32 *)loc = S + A; - else - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, &dynrel); - break; - case R_LARCH_64: - assert(E::is_64); - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, &dynrel); break; case R_LARCH_B16: check_branch(S + A - P, -(1 << 17), 1 << 17); @@ -303,13 +333,10 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { check_branch(S + A - P, -(1 << 22), 1 << 22); write_d5k16(loc, (S + A - P) >> 2); break; - case R_LARCH_B26: { - i64 val = S + A - P; - if (val < -(1 << 27) || (1 << 27) <= val) - val = get_thunk_addr(i) + A - P; - write_d10k16(loc, val >> 2); + case R_LARCH_B26: + check_branch(S + A - P, -(1 << 27), 1 << 27); + write_d10k16(loc, (S + A - P) >> 2); break; - } case R_LARCH_ABS_LO12: write_k12(loc, S + A); break; @@ -333,7 +360,15 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { write_k12(loc, S + A); break; case R_LARCH_PCALA_HI20: - write_j20(loc, hi20(S + A, P)); + if (removed_bytes == 0) { + write_j20(loc, hi20(S + A, P)); + } else { + // Rewrite pcalau12i + addi.d with pcaddi + assert(removed_bytes == 4); + *(ul32 *)loc = 0x1800'0000 | get_rd(*(ul32 *)loc); // pcaddi + write_j20(loc, (S + A - P) >> 2); + i += 3; + } break; case R_LARCH_PCALA64_LO20: write_j20(loc, higher20(S + A, P)); @@ -345,7 +380,37 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { write_k12(loc, GOT + G + A); break; case R_LARCH_GOT_PC_HI20: - write_j20(loc, hi20(GOT + G + A, P)); + if (removed_bytes == 0) { + // If the PC-relative symbol address is known at link-time, we can + // rewrite the following GOT load + // + // pcalau12i $t0, 0 # R_LARCH_GOT_PC_HI20 + // ld.d $t0, $t0, 0 # R_LARCH_GOT_PC_LO12 + // + // with the following address materialization + // + // pcalau12i $t0, 0 + // addi.d $t0, $t0, 0 + if (is_relaxable_got_load(ctx, *this, i)) { + i64 dist = compute_distance(ctx, sym, *this, rel); + if (-(1LL << 31) <= dist && dist < (1LL << 31)) { + u32 rd = get_rd(*(ul32 *)loc); + *(ul32 *)(loc + 4) = 0x02c0'0000 | (rd << 5) | rd; // addi.d + + write_j20(loc, hi20(S + A, P)); + write_k12(loc + 4, S + A); + i += 3; + break; + } + } + write_j20(loc, hi20(GOT + G + A, P)); + } else { + // Rewrite pcalau12i + ld.d with pcaddi + assert(removed_bytes == 4); + *(ul32 *)loc = 0x1800'0000 | get_rd(*(ul32 *)loc); // pcaddi + write_j20(loc, (S + A - P) >> 2); + i += 3; + } break; case R_LARCH_GOT64_PC_LO20: write_j20(loc, higher20(GOT + G + A, P)); @@ -401,13 +466,13 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { case R_LARCH_TLS_IE64_HI12: write_k12(loc, (sym.get_gottp_addr(ctx) + A) >> 52); break; - case R_LARCH_TLS_LD_PC_HI20: case R_LARCH_TLS_GD_PC_HI20: + case R_LARCH_TLS_LD_PC_HI20: check(sym.get_tlsgd_addr(ctx) + A - P, -(1LL << 31), 1LL << 31); write_j20(loc, hi20(sym.get_tlsgd_addr(ctx) + A, P)); break; - case R_LARCH_TLS_LD_HI20: case R_LARCH_TLS_GD_HI20: + case R_LARCH_TLS_LD_HI20: write_j20(loc, (sym.get_tlsgd_addr(ctx) + A) >> 12); break; case R_LARCH_ADD6: @@ -446,12 +511,148 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { case R_LARCH_64_PCREL: *(ul64 *)loc = S + A - P; break; + case R_LARCH_CALL36: + if (removed_bytes == 0) { + write_j20(loc, (S + A - P + 0x20000) >> 18); + write_k16(loc + 4, (S + A - P) >> 2); + } else { + // Rewrite PCADDU18I + JIRL to B or BL + assert(removed_bytes == 4); + if (get_rd(*(ul32 *)(contents.data() + rel.r_offset + 4)) == 0) + *(ul32 *)loc = 0x5000'0000; // B + else + *(ul32 *)loc = 0x5400'0000; // BL + write_d10k16(loc, (S + A - P) >> 2); + } + break; case R_LARCH_ADD_ULEB128: overwrite_uleb(loc, read_uleb(loc) + S + A); break; case R_LARCH_SUB_ULEB128: overwrite_uleb(loc, read_uleb(loc) - S - A); break; + case R_LARCH_TLS_DESC_PC_HI20: + // LoongArch TLSDESC uses the following code sequence to materialize + // a TP-relative address in a0. + // + // pcalau12i $a0, 0 + // R_LARCH_TLS_DESC_PC_HI20 foo + // addi.[dw] $a0, $a0, 0 + // R_LARCH_TLS_DESC_PC_LO12 foo + // ld.d $ra, $a0, 0 + // R_LARCH_TLS_DESC_LD foo + // jirl $ra, $ra, 0 + // R_LARCH_TLS_DESC_CALL foo + // + // We may relax the instructions to the following if its TP-relative + // address is known at link-time + // + // + // + // lu12i.w $a0, foo@TPOFF + // addi.w $a0, $a0, foo@TPOFF + // + // or to the following if the TP offset is small enough. + // + // + // + // + // ori $a0, $zero, foo@TPOFF + // + // If the TP-relative address is known at process startup time, we + // may relax the instructions to the following. + // + // + // + // pcalau12i $a0, foo@GOTTP + // ld.[dw] $a0, $a0, foo@GOTTP + // + // If we don't know anything about the symbol, we can still relax + // the first two instructions to a single pcaddi as shown below. + // + // + // pcaddi $a0, foo@GOTDESC + // ld.d $ra, $a0, 0 + // jirl $ra, $ra, 0 + // + // Note that if section-shrinking relaxation is enabled, nop may be + // completely deleted. + if (removed_bytes == 0) { + if (sym.has_tlsdesc(ctx)) { + i64 dist = sym.get_tlsdesc_addr(ctx) + A - P; + if (ctx.arg.relax && -(1 << 21) <= dist && dist < (1 << 21)) { + *(ul32 *)loc = 0x0340'0000; // nop + } else { + write_j20(loc, hi20(sym.get_tlsdesc_addr(ctx) + A, P)); + } + } else { + *(ul32 *)loc = 0x0340'0000; // nop + } + } + break; + case R_LARCH_TLS_DESC_PC_LO12: + if (removed_bytes == 0) { + if (sym.has_tlsdesc(ctx)) { + i64 dist = sym.get_tlsdesc_addr(ctx) + A - P; + if (ctx.arg.relax && -(1 << 21) <= dist && dist < (1 << 21)) { + // If we can directly materialize the PC-relative address + // with pcaddi, do that. + *(ul32 *)loc = 0x1800'0000 | get_rd(*(ul32 *)loc); // pcaddi + write_j20(loc, dist >> 2); + } else { + write_k12(loc, sym.get_tlsdesc_addr(ctx) + A); + } + } else { + *(ul32 *)loc = 0x0340'0000; // nop + } + } + break; + case R_LARCH_TLS_DESC_LD: + if (sym.has_tlsdesc(ctx) || removed_bytes == 4) { + // Do nothing + } else if (sym.has_gottp(ctx)) { + *(ul32 *)loc = 0x1a00'0004; // pcalau12i $a0, 0 + write_j20(loc, hi20(sym.get_gottp_addr(ctx) + A, P)); + } else { + *(ul32 *)loc = 0x1400'0004; // lu12i.w $a0, 0 + write_j20(loc, (S + A + 0x800 - ctx.tp_addr) >> 12); + } + break; + case R_LARCH_TLS_DESC_CALL: + if (sym.has_tlsdesc(ctx)) { + // Do nothing + } else if (sym.has_gottp(ctx)) { + if (E::is_64) + *(ul32 *)loc = 0x28c0'0084; // ld.d $a0, $a0, 0 + else + *(ul32 *)loc = 0x2880'0084; // ld.w $a0, $a0, 0 + write_k12(loc, sym.get_gottp_addr(ctx) + A); + } else { + i64 val = S + A - ctx.tp_addr; + if (val < 0x1000) + *(ul32 *)loc = 0x0380'0004; // ori $a0, $zero, 0 + else + *(ul32 *)loc = 0x0280'0084; // addi.w $a0, $a0, 0 + write_k12(loc, val); + } + break; + case R_LARCH_TLS_LE_HI20_R: + if (removed_bytes == 0) + write_j20(loc, (S + A + 0x800 - ctx.tp_addr) >> 12); + break; + case R_LARCH_TLS_LE_LO12_R: { + i64 val = S + A - ctx.tp_addr; + write_k12(loc, val); + + // Rewrite `addi.d $t0, $t0, ` with `addi.d $t0, $tp, ` + // if the offset is directly accessible using tp. tp is r2. + if (sign_extend(val, 11) == val) + set_rj(loc, 2); + break; + } + case R_LARCH_64: + case R_LARCH_TLS_LE_ADD_R: + break; default: unreachable(); } @@ -551,8 +752,6 @@ void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { template <> void InputSection::scan_relocations(Context &ctx) { assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); std::span> rels = get_rels(ctx); // Scan relocations @@ -576,15 +775,10 @@ void InputSection::scan_relocations(Context &ctx) { case R_LARCH_32: if constexpr (E::is_64) scan_absrel(ctx, sym, rel); - else - scan_dyn_absrel(ctx, sym, rel); - break; - case R_LARCH_64: - assert(E::is_64); - scan_dyn_absrel(ctx, sym, rel); break; case R_LARCH_B26: case R_LARCH_PCALA_HI20: + case R_LARCH_CALL36: if (sym.is_imported) sym.flags |= NEEDS_PLT; break; @@ -596,10 +790,10 @@ void InputSection::scan_relocations(Context &ctx) { case R_LARCH_TLS_IE_PC_HI20: sym.flags |= NEEDS_GOTTP; break; - case R_LARCH_TLS_LD_PC_HI20: case R_LARCH_TLS_GD_PC_HI20: - case R_LARCH_TLS_LD_HI20: + case R_LARCH_TLS_LD_PC_HI20: case R_LARCH_TLS_GD_HI20: + case R_LARCH_TLS_LD_HI20: sym.flags |= NEEDS_TLSGD; break; case R_LARCH_32_PCREL: @@ -610,8 +804,14 @@ void InputSection::scan_relocations(Context &ctx) { case R_LARCH_TLS_LE_LO12: case R_LARCH_TLS_LE64_LO20: case R_LARCH_TLS_LE64_HI12: + case R_LARCH_TLS_LE_HI20_R: + case R_LARCH_TLS_LE_LO12_R: check_tlsle(ctx, sym, rel); break; + case R_LARCH_TLS_DESC_CALL: + scan_tlsdesc(ctx, sym); + break; + case R_LARCH_64: case R_LARCH_B16: case R_LARCH_B21: case R_LARCH_ABS_HI20: @@ -645,6 +845,10 @@ void InputSection::scan_relocations(Context &ctx) { case R_LARCH_SUB64: case R_LARCH_ADD_ULEB128: case R_LARCH_SUB_ULEB128: + case R_LARCH_TLS_DESC_PC_HI20: + case R_LARCH_TLS_DESC_PC_LO12: + case R_LARCH_TLS_DESC_LD: + case R_LARCH_TLS_LE_ADD_R: break; default: Error(ctx) << *this << ": unknown relocation: " << rel; @@ -653,29 +857,157 @@ void InputSection::scan_relocations(Context &ctx) { } template <> -void Thunk::copy_buf(Context &ctx) { - static const ul32 insn[] = { - 0x1e00'000c, // pcaddu18i $t0, 0 - 0x4c00'0180, // jirl $zero, $t0, 0 - }; +void shrink_section(Context &ctx, InputSection &isec, bool use_rvc) { + std::span> rels = isec.get_rels(ctx); + isec.extra.r_deltas.resize(rels.size() + 1); + i64 delta = 0; - static_assert(E::thunk_size == sizeof(insn)); - - u8 *buf = ctx.buf + output_section.shdr.sh_offset + offset; - u64 P = output_section.shdr.sh_addr + offset; + for (i64 i = 0; i < rels.size(); i++) { + const ElfRel &r = rels[i]; + Symbol &sym = *isec.file.symbols[r.r_sym]; + isec.extra.r_deltas[i] = delta; + + // A R_LARCH_ALIGN relocation refers to the beginning of a nop + // sequence. We need to remove some or all of them so that the + // instruction that immediately follows that is aligned to a specified + // boundary. To allow that, a R_LARCH_ALIGN relocation that requests + // 2^n alignment refers to 2^n - 4 bytes of nop instructions. + if (r.r_type == R_LARCH_ALIGN) { + // The actual rule for storing the alignment size is a bit weird. + // In particular, the most significant 56 bits of r_addend is + // sometimes used to store the upper limit of the alignment, + // allowing the instruction that follows nops _not_ to be aligned at + // all. I think that's a spec bug, so we don't want to support that. + i64 alignment; + if (r.r_sym) { + if (r.r_addend >> 8) + Fatal(ctx) << isec << ": ternary R_LARCH_ALIGN is not supported: " << i; + alignment = 1 << r.r_addend; + } else { + if (!has_single_bit(r.r_addend + 4)) + Fatal(ctx) << isec << ": R_LARCH_ALIGN: invalid alignment requirement: " + << i; + alignment = r.r_addend + 4; + } + + u64 loc = isec.get_addr() + r.r_offset - delta; + u64 next_loc = loc + alignment - 4; + delta += next_loc - align_to(loc, alignment); + continue; + } - for (Symbol *sym : symbols) { - u64 S = sym->get_addr(ctx); + // Handling other relocations is optional. + if (!ctx.arg.relax || i == rels.size() - 1 || + rels[i + 1].r_type != R_LARCH_RELAX) + continue; - memcpy(buf, insn, sizeof(insn)); - write_j20(buf, (S - P + 0x20000) >> 18); - write_k16(buf + 4, (S - P) >> 2); + // Skip linker-synthesized symbols because their final addresses + // are not fixed yet. + if (sym.file == ctx.internal_obj) + continue; - buf += sizeof(insn); - P += sizeof(insn); + switch (r.r_type) { + case R_LARCH_TLS_LE_HI20_R: + case R_LARCH_TLS_LE_ADD_R: + // LoongArch uses the following three instructions to access + // TP ± 2 GiB. + // + // lu12i.w $t0, 0 # R_LARCH_TLS_LE_HI20_R + // add.d $t0, $t0, $tp # R_LARCH_TLS_LE_ADD_R + // addi.d $t0, $t0, 0 # R_LARCH_TLS_LE_LO12_R + // + // If the thread-local variable is within TP ± 2 KiB, we can + // relax them into the following single instruction. + // + // addi.d $t0, $tp, + if (i64 val = sym.get_addr(ctx) + r.r_addend - ctx.tp_addr; + sign_extend(val, 11) == val) + delta += 4; + break; + case R_LARCH_PCALA_HI20: + // The following two instructions are used to materialize a + // PC-relative address with a 32 bit displacement. + // + // pcalau12i $t0, 0 # R_LARCH_PCALA_HI20 + // addi.d $t0, $t0, 0 # R_LARCH_PCALA_LO12 + // + // If the displacement is within ±2 MiB, we can relax them to + // the following instruction. + // + // pcaddi $t0, + if (i + 3 < rels.size() && + rels[i + 2].r_type == R_LARCH_PCALA_LO12 && + rels[i + 2].r_offset == rels[i].r_offset + 4 && + rels[i + 3].r_type == R_LARCH_RELAX) { + i64 dist = compute_distance(ctx, sym, isec, r); + u32 insn1 = *(ul32 *)(isec.contents.data() + rels[i].r_offset); + u32 insn2 = *(ul32 *)(isec.contents.data() + rels[i].r_offset + 4); + bool is_addi_d = (insn2 & 0xffc0'0000) == 0x02c0'0000; + + if (dist % 4 == 0 && -(1 << 21) <= dist && dist < (1 << 21) && + is_addi_d && get_rd(insn1) == get_rd(insn2) && + get_rd(insn2) == get_rj(insn2)) + delta += 4; + } + break; + case R_LARCH_CALL36: + // A CALL36 relocation referes to the following instruction pair + // to jump to PC ± 128 GiB. + // + // pcaddu18i $t0, 0 # R_LARCH_CALL36 + // jirl $zero/$ra, $t0, 0 + // + // If the displacement is PC ± 128 MiB, we can use B or BL instead. + // Note that $zero is $r0 and $ra is $r1. + if (i64 dist = compute_distance(ctx, sym, isec, r); + -(1 << 27) <= dist && dist < (1 << 27)) + if (u32 jirl = *(ul32 *)(isec.contents.data() + rels[i].r_offset + 4); + get_rd(jirl) == 0 || get_rd(jirl) == 1) + delta += 4; + break; + case R_LARCH_GOT_PC_HI20: + // The following two instructions are used to load a symbol address + // from the GOT. + // + // pcalau12i $t0, 0 # R_LARCH_GOT_PC_HI20 + // ld.d $t0, $t0, 0 # R_LARCH_GOT_PC_LO12 + // + // If the PC-relative symbol address is known at link-time, we can + // relax them to the following instruction. + // + // pcaddi $t0, + if (is_relaxable_got_load(ctx, isec, i)) { + i64 dist = compute_distance(ctx, sym, isec, r); + if (dist % 4 == 0 && -(1 << 21) <= dist && dist < (1 << 21)) + delta += 4; + } + break; + case R_LARCH_TLS_DESC_PC_HI20: + if (sym.has_tlsdesc(ctx)) { + u64 P = isec.get_addr() + r.r_offset; + i64 dist = sym.get_tlsdesc_addr(ctx) + r.r_addend - P; + if (-(1 << 21) <= dist && dist < (1 << 21)) + delta += 4; + } else { + delta += 4; + } + break; + case R_LARCH_TLS_DESC_PC_LO12: + if (!sym.has_tlsdesc(ctx)) + delta += 4; + break; + case R_LARCH_TLS_DESC_LD: + if (!sym.has_tlsdesc(ctx) && !sym.has_gottp(ctx) && + sym.get_addr(ctx) + r.r_addend - ctx.tp_addr < 0x1000) + delta += 4; + break; + } } + + isec.extra.r_deltas[rels.size()] = delta; + isec.sh_size -= delta; } -} // namespace mold::elf +} // namespace mold #endif diff --git a/elf/arch-m68k.cc b/src/arch-m68k.cc similarity index 95% rename from elf/arch-m68k.cc rename to src/arch-m68k.cc index f9de3be0..edffe048 100644 --- a/elf/arch-m68k.cc +++ b/src/arch-m68k.cc @@ -16,7 +16,7 @@ #include "mold.h" -namespace mold::elf { +namespace mold { using E = M68K; @@ -78,11 +78,6 @@ template <> void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { std::span> rels = get_rels(ctx); - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - for (i64 i = 0; i < rels.size(); i++) { const ElfRel &rel = rels[i]; if (rel.r_type == R_NONE) @@ -126,7 +121,6 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { switch (rel.r_type) { case R_68K_32: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, &dynrel); break; case R_68K_16: write16(S + A); @@ -251,8 +245,6 @@ void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { template <> void InputSection::scan_relocations(Context &ctx) { assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); std::span> rels = get_rels(ctx); for (i64 i = 0; i < rels.size(); i++) { @@ -266,9 +258,6 @@ void InputSection::scan_relocations(Context &ctx) { Error(ctx) << sym << ": GNU ifunc symbol is not supported on m68k"; switch (rel.r_type) { - case R_68K_32: - scan_dyn_absrel(ctx, sym, rel); - break; case R_68K_16: case R_68K_8: scan_absrel(ctx, sym, rel); @@ -312,6 +301,7 @@ void InputSection::scan_relocations(Context &ctx) { case R_68K_TLS_LE8: check_tlsle(ctx, sym, rel); break; + case R_68K_32: case R_68K_TLS_LDO32: case R_68K_TLS_LDO16: case R_68K_TLS_LDO8: @@ -322,4 +312,4 @@ void InputSection::scan_relocations(Context &ctx) { } } -} // namespace mold::elf +} // namespace mold diff --git a/elf/arch-ppc32.cc b/src/arch-ppc32.cc similarity index 96% rename from elf/arch-ppc32.cc rename to src/arch-ppc32.cc index 3bc0db6c..4525e73d 100644 --- a/elf/arch-ppc32.cc +++ b/src/arch-ppc32.cc @@ -42,7 +42,7 @@ #include "mold.h" -namespace mold::elf { +namespace mold { using E = PPC32; @@ -54,7 +54,7 @@ static u64 higha(u64 x) { return ((x + 0x8000) >> 16) & 0xffff; } template <> void write_plt_header(Context &ctx, u8 *buf) { - static const ub32 insn[] = { + constexpr ub32 insn[] = { // Get the address of this PLT section 0x7c08'02a6, // mflr r0 0x429f'0005, // bcl 20, 31, 4 @@ -88,7 +88,7 @@ void write_plt_header(Context &ctx, u8 *buf) { loc[5] |= lo(ctx.gotplt->shdr.sh_addr - ctx.plt->shdr.sh_addr + 4); } -static const ub32 plt_entry[] = { +constexpr ub32 plt_entry[] = { // Get the address of this PLT entry 0x7c08'02a6, // mflr r0 0x429f'0005, // bcl 20, 31, 4 @@ -148,11 +148,6 @@ template <> void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { std::span> rels = get_rels(ctx); - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - u64 GOT2 = file.extra.got2 ? file.extra.got2->get_addr() : 0; for (i64 i = 0; i < rels.size(); i++) { @@ -170,10 +165,6 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { u64 GOT = ctx.got->shdr.sh_addr; switch (rel.r_type) { - case R_PPC_ADDR32: - case R_PPC_UADDR32: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, &dynrel); - break; case R_PPC_ADDR14: *(ub32 *)loc |= bits(S + A, 15, 2) << 2; break; @@ -275,6 +266,8 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { case R_PPC_GOT_TPREL16: *(ub16 *)loc = sym.get_gottp_addr(ctx) - GOT; break; + case R_PPC_ADDR32: + case R_PPC_UADDR32: case R_PPC_TLS: case R_PPC_TLSGD: case R_PPC_TLSLD: @@ -323,8 +316,6 @@ void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { template <> void InputSection::scan_relocations(Context &ctx) { assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); std::span> rels = get_rels(ctx); // Scan relocations @@ -339,10 +330,6 @@ void InputSection::scan_relocations(Context &ctx) { sym.flags |= NEEDS_GOT | NEEDS_PLT; switch (rel.r_type) { - case R_PPC_ADDR32: - case R_PPC_UADDR32: - scan_dyn_absrel(ctx, sym, rel); - break; case R_PPC_ADDR14: case R_PPC_ADDR16: case R_PPC_UADDR16: @@ -391,6 +378,8 @@ void InputSection::scan_relocations(Context &ctx) { case R_PPC_TPREL16_HA: check_tlsle(ctx, sym, rel); break; + case R_PPC_ADDR32: + case R_PPC_UADDR32: case R_PPC_LOCAL24PC: case R_PPC_TLS: case R_PPC_TLSGD: @@ -409,7 +398,7 @@ void InputSection::scan_relocations(Context &ctx) { template <> void Thunk::copy_buf(Context &ctx) { - static const ub32 local_thunk[] = { + constexpr ub32 local_thunk[] = { // Get this thunk's address 0x7c08'02a6, // mflr r0 0x429f'0005, // bcl 20, 31, 4 @@ -450,4 +439,4 @@ void Thunk::copy_buf(Context &ctx) { } } -} // namespace mold::elf +} // namespace mold diff --git a/elf/arch-ppc64v1.cc b/src/arch-ppc64v1.cc similarity index 96% rename from elf/arch-ppc64v1.cc rename to src/arch-ppc64v1.cc index cef71954..e3ec1c55 100644 --- a/elf/arch-ppc64v1.cc +++ b/src/arch-ppc64v1.cc @@ -50,7 +50,7 @@ #include #include -namespace mold::elf { +namespace mold { using E = PPC64V1; @@ -68,7 +68,7 @@ static u64 higha(u64 x) { return ((x + 0x8000) >> 16) & 0xffff; } // resolved addresses. template <> void write_plt_header(Context &ctx, u8 *buf) { - static const ub32 insn[] = { + constexpr ub32 insn[] = { 0x7d88'02a6, // mflr r12 0x429f'0005, // bcl 20, 31, 4 // obtain PC 0x7d68'02a6, // mflr r11 @@ -101,7 +101,7 @@ void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { // call to the PLT entry jumps to. So we need to strictly follow the PLT // section layout as the loader expect it to be. if (idx < 0x8000) { - static const ub32 insn[] = { + constexpr ub32 insn[] = { 0x3800'0000, // li r0, PLT_INDEX 0x4b00'0000, // b plt0 }; @@ -110,7 +110,7 @@ void write_plt_entry(Context &ctx, u8 *buf, Symbol &sym) { loc[0] |= idx; loc[1] |= (ctx.plt->shdr.sh_addr - sym.get_plt_addr(ctx) - 4) & 0x00ff'ffff; } else { - static const ub32 insn[] = { + constexpr ub32 insn[] = { 0x3c00'0000, // lis r0, PLT_INDEX@high 0x6000'0000, // ori r0, r0, PLT_INDEX@lo 0x4b00'0000, // b plt0 @@ -154,11 +154,6 @@ template <> void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { std::span> rels = get_rels(ctx); - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - for (i64 i = 0; i < rels.size(); i++) { const ElfRel &rel = rels[i]; if (rel.r_type == R_NONE) @@ -182,11 +177,7 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { u64 TOC = ctx.extra.TOC->value; switch (rel.r_type) { - case R_PPC64_ADDR64: - apply_toc_rel(ctx, sym, rel, loc, S, A, P, &dynrel); - break; case R_PPC64_TOC: - apply_toc_rel(ctx, *ctx.extra.TOC, rel, loc, TOC, A, P, &dynrel); break; case R_PPC64_TOC16_HA: *(ub16 *)loc = ha(S + A - TOC); @@ -277,6 +268,7 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { case R_PPC64_GOT_TPREL16_LO_DS: *(ub16 *)loc |= (sym.get_gottp_addr(ctx) - TOC) & 0xfffc; break; + case R_PPC64_ADDR64: case R_PPC64_PLTSEQ: case R_PPC64_PLTCALL: case R_PPC64_TLS: @@ -341,8 +333,6 @@ void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { template <> void InputSection::scan_relocations(Context &ctx) { assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); std::span> rels = get_rels(ctx); // Scan relocations @@ -362,10 +352,6 @@ void InputSection::scan_relocations(Context &ctx) { sym.flags |= NEEDS_PPC_OPD; switch (rel.r_type) { - case R_PPC64_ADDR64: - case R_PPC64_TOC: - scan_toc_rel(ctx, sym, rel); - break; case R_PPC64_GOT_TPREL16_HA: sym.flags |= NEEDS_GOTTP; break; @@ -387,6 +373,8 @@ void InputSection::scan_relocations(Context &ctx) { case R_PPC64_TPREL16_LO_DS: check_tlsle(ctx, sym, rel); break; + case R_PPC64_ADDR64: + case R_PPC64_TOC: case R_PPC64_REL32: case R_PPC64_REL64: case R_PPC64_TOC16_HA: @@ -421,7 +409,7 @@ void Thunk::copy_buf(Context &ctx) { // If the destination is .plt.got, we save the current r2, read an // address of a function descriptor from .got, restore %r2 and jump // to the function. - static const ub32 pltgot_thunk[] = { + constexpr ub32 pltgot_thunk[] = { // Store the caller's %r2 0xf841'0028, // std %r2, 40(%r1) @@ -439,7 +427,7 @@ void Thunk::copy_buf(Context &ctx) { }; // If the destination is .plt, read a function descriptor from .got.plt. - static const ub32 plt_thunk[] = { + constexpr ub32 plt_thunk[] = { // Store the caller's %r2 0xf841'0028, // std %r2, 40(%r1) @@ -458,7 +446,7 @@ void Thunk::copy_buf(Context &ctx) { // If the destination is a non-imported function, we directly jump // to the function entry address. - static const ub32 local_thunk[] = { + constexpr ub32 local_thunk[] = { 0x3d82'0000, // addis r12, r2, foo@toc@ha 0x398c'0000, // addi r12, r12, foo@toc@lo 0x7d89'03a6, // mtctr r12 @@ -689,4 +677,4 @@ void PPC64OpdSection::copy_buf(Context &ctx) { } } -} // namespace mold::elf +} // namespace mold diff --git a/elf/arch-ppc64v2.cc b/src/arch-ppc64v2.cc similarity index 96% rename from elf/arch-ppc64v2.cc rename to src/arch-ppc64v2.cc index 15f855af..78456fdb 100644 --- a/elf/arch-ppc64v2.cc +++ b/src/arch-ppc64v2.cc @@ -82,7 +82,7 @@ #include "mold.h" -namespace mold::elf { +namespace mold { using E = PPC64V2; @@ -106,7 +106,7 @@ static void write34(u8 *loc, u64 x) { // resolved addresses. template <> void write_plt_header(Context &ctx, u8 *buf) { - static const ul32 insn[] = { + constexpr ul32 insn[] = { // Get PC 0x7c08'02a6, // mflr r0 0x429f'0005, // bcl 20, 31, 4 // obtain PC @@ -186,11 +186,6 @@ template <> void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { std::span> rels = get_rels(ctx); - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - for (i64 i = 0; i < rels.size(); i++) { const ElfRel &rel = rels[i]; if (rel.r_type == R_NONE) @@ -210,12 +205,6 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { auto no_r2save_thunk_addr = [&] { return get_thunk_addr(i) + 8; }; switch (rel.r_type) { - case R_PPC64_ADDR64: - if (name() == ".toc") - apply_toc_rel(ctx, sym, rel, loc, S, A, P, &dynrel); - else - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, &dynrel); - break; case R_PPC64_TOC16_HA: *(ul16 *)loc = ha(S + A - TOC); break; @@ -337,6 +326,7 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { case R_PPC64_TPREL34: write34(loc, S + A - ctx.tp_addr); break; + case R_PPC64_ADDR64: case R_PPC64_PLTSEQ: case R_PPC64_PLTSEQ_NOTOC: case R_PPC64_PLTCALL: @@ -403,8 +393,6 @@ void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { template <> void InputSection::scan_relocations(Context &ctx) { assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); std::span> rels = get_rels(ctx); // Scan relocations @@ -419,12 +407,6 @@ void InputSection::scan_relocations(Context &ctx) { sym.flags |= NEEDS_GOT | NEEDS_PLT; switch (rel.r_type) { - case R_PPC64_ADDR64: - if (name() == ".toc") - scan_toc_rel(ctx, sym, rel); - else - scan_dyn_absrel(ctx, sym, rel); - break; case R_PPC64_GOT_TPREL16_HA: case R_PPC64_GOT_TPREL_PCREL34: sym.flags |= NEEDS_GOTTP; @@ -458,6 +440,7 @@ void InputSection::scan_relocations(Context &ctx) { case R_PPC64_TPREL34: check_tlsle(ctx, sym, rel); break; + case R_PPC64_ADDR64: case R_PPC64_REL32: case R_PPC64_REL64: case R_PPC64_TOC16_HA: @@ -495,7 +478,7 @@ template <> void Thunk::copy_buf(Context &ctx) { // If the destination is PLT, we read an address from .got.plt or .got // and jump there. - static const ul32 plt_thunk[] = { + constexpr ul32 plt_thunk[] = { 0xf841'0018, // std r2, 24(r1) 0x6000'0000, // nop 0x3d82'0000, // addis r12, r2, foo@gotplt@toc@ha @@ -504,7 +487,7 @@ void Thunk::copy_buf(Context &ctx) { 0x4e80'0420, // bctr }; - static const ul32 plt_thunk_power10[] = { + constexpr ul32 plt_thunk_power10[] = { 0xf841'0018, // std r2, 24(r1) 0x6000'0000, // nop 0x0410'0000, // pld r12, foo@gotplt@pcrel @@ -515,7 +498,7 @@ void Thunk::copy_buf(Context &ctx) { // If the destination is a non-imported function, we directly jump // to its local entry point. - static const ul32 local_thunk[] = { + constexpr ul32 local_thunk[] = { 0xf841'0018, // std r2, 24(r1) 0x6000'0000, // nop 0x3d82'0000, // addis r12, r2, foo@toc@ha @@ -524,7 +507,7 @@ void Thunk::copy_buf(Context &ctx) { 0x4e80'0420, // bctr }; - static const ul32 local_thunk_power10[] = { + constexpr ul32 local_thunk_power10[] = { 0xf841'0018, // std r2, 24(r1) 0x6000'0000, // nop 0x0610'0000, // pla r12, foo@pcrel @@ -677,4 +660,4 @@ u64 get_eflags(Context &ctx) { return 2; } -} // namespace mold::elf +} // namespace mold diff --git a/elf/arch-riscv.cc b/src/arch-riscv.cc similarity index 83% rename from elf/arch-riscv.cc rename to src/arch-riscv.cc index 7e7f618b..fd600b61 100644 --- a/elf/arch-riscv.cc +++ b/src/arch-riscv.cc @@ -12,71 +12,20 @@ // From the linker's point of view, the RISC-V's psABI is unique because // sections in input object files can be shrunk while being copied to the // output file. That is contrary to other psABIs in which sections are an -// atomic unit of copying. Let me explain it in more details. -// -// Since RISC-V instructions are 16-bit or 32-bit long, there's no way to -// embed a very large immediate into a branch instruction. In fact, JAL -// (jump and link) instruction can jump to only within PC ± 1 MiB because -// its immediate is only 21 bits long. If the destination is out of its -// reach, we need to use two instructions instead; the first instruction -// being AUIPC which sets upper 20 bits to a register and the second being -// JALR with a 12-bit immediate and the register. Combined, they specify a -// 32 bits displacement. -// -// Other RISC ISAs have the same limitation, and they solved the problem by -// letting the linker create so-called "range extension thunks". It works as -// follows: the compiler optimistically emits single jump instructions for -// function calls. If the linker finds that a branch target is out of reach, -// it emits a small piece of machine code near the branch instruction and -// redirect the branch to the linker-synthesized code. The code constructs a -// full 32-bit address in a register and jump to the destination. That -// linker-synthesized code is called "range extension thunks" or just -// "thunks". -// -// The RISC-V psABI is unique that it works the other way around. That is, -// for RISC-V, the compiler always emits two instructions (AUIPC + JAL) for -// function calls. If the linker finds the destination is reachable with a -// single instruction, it replaces the two instructions with the one and -// shrink the section size by one instruction length, instead of filling the -// gap with a nop. -// -// With the presence of this relaxation, sections can no longer be -// considered as an atomic unit. If we delete 4 bytes from the middle of a -// section, all contents after that point needs to be shifted by 4. Symbol -// values and relocation offsets have to be adjusted accordingly if they -// refer to past the deleted bytes. -// -// In mold, we use `r_deltas` to memorize how many bytes have be adjusted -// for relocations. For symbols, we directly mutate their `value` member. -// -// RISC-V object files tend to have way more relocations than those for -// other targets. This is because all branches, including ones that jump -// within the same section, are explicitly expressed with relocations. -// Here is why we need them: all control-flow statements such as `if` or -// `for` are implemented using branch instructions. For other targets, the -// compiler doesn't emit relocations for such branches because they know -// at compile-time exactly how many bytes has to be skipped. That's not -// true to RISC-V because the linker may delete bytes between a branch and -// its destination. Therefore, all branches including in-section ones have -// to be explicitly expressed with relocations. -// -// Note that this mechanism only shrink sections and never enlarge, as -// the compiler always emits the longest instruction sequence. This -// makes the linker implementation a bit simpler because we don't need -// to worry about oscillation. +// atomic unit of copying. See file comments in shrink-sections.cc for +// details. // // https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc #if MOLD_RV64LE || MOLD_RV64BE || MOLD_RV32LE || MOLD_RV32BE -#include "elf.h" #include "mold.h" #include #include #include -namespace mold::elf { +namespace mold { using E = MOLD_TARGET; @@ -141,7 +90,7 @@ static void set_rs1(u8 *loc, u32 rs1) { template <> void write_plt_header(Context &ctx, u8 *buf) { - static const ul32 insn_64[] = { + constexpr ul32 insn_64[] = { 0x0000'0397, // auipc t2, %pcrel_hi(.got.plt) 0x41c3'0333, // sub t1, t1, t3 # .plt entry + hdr + 12 0x0003'be03, // ld t3, %pcrel_lo(1b)(t2) # _dl_runtime_resolve @@ -152,7 +101,7 @@ void write_plt_header(Context &ctx, u8 *buf) { 0x000e'0067, // jr t3 }; - static const ul32 insn_32[] = { + constexpr ul32 insn_32[] = { 0x0000'0397, // auipc t2, %pcrel_hi(.got.plt) 0x41c3'0333, // sub t1, t1, t3 # .plt entry + hdr + 12 0x0003'ae03, // lw t3, %pcrel_lo(1b)(t2) # _dl_runtime_resolve @@ -172,14 +121,14 @@ void write_plt_header(Context &ctx, u8 *buf) { write_itype(buf + 16, gotplt - plt); } -static const ul32 plt_entry_64[] = { +constexpr ul32 plt_entry_64[] = { 0x0000'0e17, // auipc t3, %pcrel_hi(function@.got.plt) 0x000e'3e03, // ld t3, %pcrel_lo(1b)(t3) 0x000e'0367, // jalr t1, t3 0x0010'0073, // ebreak }; -static const ul32 plt_entry_32[] = { +constexpr ul32 plt_entry_32[] = { 0x0000'0e17, // auipc t3, %pcrel_hi(function@.got.plt) 0x000e'2e03, // lw t3, %pcrel_lo(1b)(t3) 0x000e'0367, // jalr t1, t3 @@ -261,11 +210,6 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { std::span> rels = get_rels(ctx); u64 GP = ctx.__global_pointer ? ctx.__global_pointer->get_addr(ctx) : 0; - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - auto get_r_delta = [&](i64 idx) { return extra.r_deltas.empty() ? 0 : extra.r_deltas[idx]; }; @@ -316,12 +260,8 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { case R_RISCV_32: if constexpr (E::is_64) *(U32 *)loc = S + A; - else - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, &dynrel); break; case R_RISCV_64: - assert(E::is_64); - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, &dynrel); break; case R_RISCV_BRANCH: check(S + A - P, -(1 << 12), 1 << 12); @@ -764,38 +704,9 @@ void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { } } -template <> -void InputSection::copy_contents_riscv(Context &ctx, u8 *buf) { - // If a section is not relaxed, we can copy it as a one big chunk. - if (extra.r_deltas.empty()) { - copy_contents(ctx, buf); - return; - } - - // A relaxed section is copied piece-wise. - std::span> rels = get_rels(ctx); - i64 pos = 0; - - for (i64 i = 0; i < rels.size(); i++) { - i64 delta = extra.r_deltas[i + 1] - extra.r_deltas[i]; - if (delta == 0) - continue; - assert(delta > 0); - - const ElfRel &r = rels[i]; - memcpy(buf, contents.data() + pos, r.r_offset - pos); - buf += r.r_offset - pos; - pos = r.r_offset + delta; - } - - memcpy(buf, contents.data() + pos, contents.size() - pos); -} - template <> void InputSection::scan_relocations(Context &ctx) { assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); std::span> rels = get_rels(ctx); // Scan relocations @@ -813,17 +724,10 @@ void InputSection::scan_relocations(Context &ctx) { case R_RISCV_32: if constexpr (E::is_64) scan_absrel(ctx, sym, rel); - else - scan_dyn_absrel(ctx, sym, rel); break; case R_RISCV_HI20: scan_absrel(ctx, sym, rel); break; - case R_RISCV_64: - if constexpr (!E::is_64) - Error(ctx) << *this << ": R_RISCV_64 cannot be used on RV32"; - scan_dyn_absrel(ctx, sym, rel); - break; case R_RISCV_CALL: case R_RISCV_CALL_PLT: case R_RISCV_PLT32: @@ -856,6 +760,7 @@ void InputSection::scan_relocations(Context &ctx) { if (ctx.arg.shared) Error(ctx) << *this << ": R_RISCV_GPREL_HI20 may not be used with -shared"; break; + case R_RISCV_64: case R_RISCV_BRANCH: case R_RISCV_JAL: case R_RISCV_PCREL_LO12_I: @@ -918,34 +823,9 @@ u64 get_eflags(Context &ctx) { return ret; } -static bool is_resizable(InputSection *isec) { - return isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC) && - (isec->shdr().sh_flags & SHF_EXECINSTR); -} - -// Returns the distance between a relocated place and a symbol. -static i64 compute_distance(Context &ctx, Symbol &sym, - InputSection &isec, const ElfRel &rel) { - // We handle absolute symbols as if they were infinitely far away - // because `shrink_section` may increase a distance between a branch - // instruction and an absolute symbol. Branching to an absolute - // location is extremely rare in real code, though. - if (sym.is_absolute()) - return INT32_MAX; - - // Likewise, relocations against weak undefined symbols won't be relaxed. - if (sym.esym().is_undef_weak()) - return INT32_MAX; - - // Compute a distance between the relocated place and the symbol. - i64 S = sym.get_addr(ctx); - i64 A = rel.r_addend; - i64 P = isec.get_addr() + rel.r_offset; - return S + A - P; -} - -// Scan relocations to shrink sections. -static void shrink_section(Context &ctx, InputSection &isec, bool use_rvc) { +// Scan relocations to a given shrink section. +template <> +void shrink_section(Context &ctx, InputSection &isec, bool use_rvc) { std::span> rels = isec.get_rels(ctx); isec.extra.r_deltas.resize(rels.size() + 1); @@ -1126,55 +1006,6 @@ static void shrink_section(Context &ctx, InputSection &isec, bool use_rvc) isec.sh_size -= delta; } -// Shrink sections by interpreting relocations. -// -// This operation seems to be optional, because by default longest -// instructions are being used. However, calling this function is actually -// mandatory because of R_RISCV_ALIGN. R_RISCV_ALIGN is a directive to the -// linker to align the location referred to by the relocation to a -// specified byte boundary. We at least have to interpret them to satisfy -// the alignment constraints. -template <> -i64 riscv_resize_sections(Context &ctx) { - Timer t(ctx, "riscv_resize_sections"); - - // True if we can use the 2-byte instructions. This is usually true on - // Unix because RV64GC is generally considered the baseline hardware. - bool use_rvc = get_eflags(ctx) & EF_RISCV_RVC; - - // Find all the relocations that can be relaxed. - // This step should only shrink sections. - tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { - for (std::unique_ptr> &isec : file->sections) - if (is_resizable(isec.get())) - shrink_section(ctx, *isec, use_rvc); - }); - - // Fix symbol values. - tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { - for (Symbol *sym : file->symbols) { - if (sym->file != file) - continue; - - InputSection *isec = sym->get_input_section(); - if (!isec || isec->extra.r_deltas.empty()) - continue; - - std::span> rels = isec->get_rels(ctx); - auto it = std::lower_bound(rels.begin(), rels.end(), sym->value, - [&](const ElfRel &r, u64 val) { - return r.r_offset < val; - }); - - sym->value -= isec->extra.r_deltas[it - rels.begin()]; - } - }); - - // Re-compute section offset again to finalize them. - compute_section_sizes(ctx); - return set_osec_offsets(ctx); -} - // ISA name handlers // // An example of ISA name is "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0". @@ -1190,8 +1021,8 @@ i64 riscv_resize_sections(Context &ctx) { // Each extension consists of a name, a major version and a minor version. // For example, "m2p0" indicates the "m" extension of version 2.0. "p" is // just a separator. Versions are often omitted in documents, but they are -// mandatory in .riscv.attributes. Likewise, abbreviations as "g" (which -// is short for "IMAFD") are not allowed in .riscv.attributes. +// mandatory in .riscv.attributes. Likewise, abbreviations such as "G" +// (which is short for "IMAFD") are not allowed in .riscv.attributes. // // Each RISC-V object file contains an ISA string enumerating extensions // used by the object file. We need to merge input objects' ISA strings @@ -1384,6 +1215,6 @@ void RiscvAttributesSection::copy_buf(Context &ctx) { write_vector(ctx.buf + this->shdr.sh_offset, contents); } -} // namespace mold::elf +} // namespace mold #endif diff --git a/elf/arch-s390x.cc b/src/arch-s390x.cc similarity index 94% rename from elf/arch-s390x.cc rename to src/arch-s390x.cc index 5fe7539d..dedc607c 100644 --- a/elf/arch-s390x.cc +++ b/src/arch-s390x.cc @@ -37,7 +37,7 @@ #include "mold.h" -namespace mold::elf { +namespace mold { using E = S390X; @@ -116,11 +116,6 @@ template <> void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { std::span> rels = get_rels(ctx); - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - for (i64 i = 0; i < rels.size(); i++) { const ElfRel &rel = rels[i]; if (rel.r_type == R_NONE) @@ -153,7 +148,6 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { switch (rel.r_type) { case R_390_64: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, &dynrel); break; case R_390_8: check(S + A, 0, 1 << 8); @@ -256,7 +250,7 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { *(ub32 *)loc = (GOT + A - P) >> 1; break; case R_390_GOTENT: - check(GOT + G + A - P, -(1LL << 32), 1LL << 32); + check_dbl(GOT + G + A - P, -(1LL << 32), 1LL << 32); *(ub32 *)loc = (GOT + G + A - P) >> 1; break; case R_390_TLS_LE32: @@ -303,22 +297,14 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { case R_390_TLS_LDM32: if (ctx.got->has_tlsld(ctx)) *(ub32 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT; + else + *(ub32 *)loc = ctx.dtp_addr - ctx.tp_addr; break; case R_390_TLS_LDM64: if (ctx.got->has_tlsld(ctx)) *(ub64 *)loc = ctx.got->get_tlsld_addr(ctx) + A - GOT; - break; - case R_390_TLS_LDO32: - if (ctx.got->has_tlsld(ctx)) - *(ub32 *)loc = S + A - ctx.dtp_addr; - else - *(ub32 *)loc = S + A - ctx.tp_addr; - break; - case R_390_TLS_LDO64: - if (ctx.got->has_tlsld(ctx)) - *(ub64 *)loc = S + A - ctx.dtp_addr; else - *(ub64 *)loc = S + A - ctx.tp_addr; + *(ub64 *)loc = ctx.dtp_addr - ctx.tp_addr; break; case R_390_TLS_LDCALL: if (!ctx.got->has_tlsld(ctx)) { @@ -327,6 +313,12 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { memcpy(loc, insn, sizeof(insn)); } break; + case R_390_TLS_LDO32: + *(ub32 *)loc = S + A - ctx.dtp_addr; + break; + case R_390_TLS_LDO64: + *(ub64 *)loc = S + A - ctx.dtp_addr; + break; default: unreachable(); } @@ -385,8 +377,6 @@ void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { template <> void InputSection::scan_relocations(Context &ctx) { assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); std::span> rels = get_rels(ctx); // Scan relocations @@ -401,9 +391,6 @@ void InputSection::scan_relocations(Context &ctx) { sym.flags |= NEEDS_GOT | NEEDS_PLT; switch (rel.r_type) { - case R_390_64: - scan_dyn_absrel(ctx, sym, rel); - break; case R_390_8: case R_390_12: case R_390_16: @@ -457,8 +444,7 @@ void InputSection::scan_relocations(Context &ctx) { // We always want to relax calls to __tls_get_offset() in statically- // linked executables because __tls_get_offset() in libc.a just calls // abort(). - if ((ctx.arg.relax && sym.is_tprel_linktime_const(ctx)) || - ctx.arg.is_static) { + if (ctx.arg.static_ || (ctx.arg.relax && sym.is_tprel_linktime_const(ctx))) { // Do nothing } else if (ctx.arg.relax && sym.is_tprel_runtime_const(ctx)) { sym.flags |= NEEDS_GOTTP; @@ -468,7 +454,7 @@ void InputSection::scan_relocations(Context &ctx) { break; case R_390_TLS_LDM32: case R_390_TLS_LDM64: - if (ctx.arg.is_static || (ctx.arg.relax && !ctx.arg.shared)) { + if (ctx.arg.static_ || (ctx.arg.relax && !ctx.arg.shared)) { // Do nothing } else { ctx.needs_tlsld = true; @@ -478,6 +464,7 @@ void InputSection::scan_relocations(Context &ctx) { case R_390_TLS_LE64: check_tlsle(ctx, sym, rel); break; + case R_390_64: case R_390_TLS_LDO32: case R_390_TLS_LDO64: case R_390_TLS_GDCALL: @@ -489,4 +476,4 @@ void InputSection::scan_relocations(Context &ctx) { } } -} // namespace mold::elf +} // namespace mold diff --git a/elf/arch-sh4.cc b/src/arch-sh4.cc similarity index 96% rename from elf/arch-sh4.cc rename to src/arch-sh4.cc index bf307048..8e5d336a 100644 --- a/elf/arch-sh4.cc +++ b/src/arch-sh4.cc @@ -60,7 +60,7 @@ #include "mold.h" -namespace mold::elf { +namespace mold { using E = SH4; @@ -230,11 +230,6 @@ template <> void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { std::span> rels = get_rels(ctx); - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - for (i64 i = 0; i < rels.size(); i++) { const ElfRel &rel = rels[i]; if (rel.r_type == R_NONE) @@ -251,7 +246,6 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { switch (rel.r_type) { case R_SH_DIR32: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, &dynrel); break; case R_SH_REL32: case R_SH_PLT32: @@ -323,8 +317,6 @@ void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { template <> void InputSection::scan_relocations(Context &ctx) { assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); std::span> rels = get_rels(ctx); for (i64 i = 0; i < rels.size(); i++) { @@ -338,9 +330,6 @@ void InputSection::scan_relocations(Context &ctx) { Error(ctx) << sym << ": GNU ifunc symbol is not supported on sh4"; switch (rel.r_type) { - case R_SH_DIR32: - scan_dyn_absrel(ctx, sym, rel); - break; case R_SH_REL32: scan_pcrel(ctx, sym, rel); break; @@ -363,6 +352,7 @@ void InputSection::scan_relocations(Context &ctx) { case R_SH_TLS_LE_32: check_tlsle(ctx, sym, rel); break; + case R_SH_DIR32: case R_SH_GOTPC: case R_SH_GOTOFF: case R_SH_TLS_LDO_32: @@ -373,4 +363,4 @@ void InputSection::scan_relocations(Context &ctx) { } } -} // namespace mold::elf +} // namespace mold diff --git a/elf/arch-sparc64.cc b/src/arch-sparc64.cc similarity index 84% rename from elf/arch-sparc64.cc rename to src/arch-sparc64.cc index bebbe11d..b04bb301 100644 --- a/elf/arch-sparc64.cc +++ b/src/arch-sparc64.cc @@ -58,7 +58,7 @@ #include "mold.h" -namespace mold::elf { +namespace mold { using E = SPARC64; @@ -142,11 +142,6 @@ template <> void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { std::span> rels = get_rels(ctx); - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - for (i64 i = 0; i < rels.size(); i++) { const ElfRel &rel = rels[i]; if (rel.r_type == R_NONE) @@ -169,9 +164,6 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { u64 GOT = ctx.got->shdr.sh_addr; switch (rel.r_type) { - case R_SPARC_64: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, &dynrel); - break; case R_SPARC_5: check(S + A, 0, 1 << 5); *(ub32 *)loc |= bits(S + A, 4, 0); @@ -359,27 +351,75 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { *(ub32 *)loc |= bits(S + A, 11, 0); break; case R_SPARC_TLS_GD_HI22: - *(ub32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A - GOT, 31, 10); + if (sym.has_tlsgd(ctx)) { + *(ub32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A - GOT, 31, 10); + } else if (sym.has_gottp(ctx)) { + *(ub32 *)loc |= bits(sym.get_gottp_addr(ctx) + A - GOT, 31, 10); + } else { + *(ub32 *)loc |= bits(~(S + A - ctx.tp_addr), 31, 10); + } break; case R_SPARC_TLS_GD_LO10: - *(ub32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A - GOT, 9, 0); + if (sym.has_tlsgd(ctx)) { + *(ub32 *)loc |= bits(sym.get_tlsgd_addr(ctx) + A - GOT, 9, 0); + } else if (sym.has_gottp(ctx)) { + u32 rd = bits(*(ub32 *)loc, 29, 25); + *(ub32 *)loc = 0x8010'2000 | (rd << 25) | (rd << 14); // or %reg, $0, %reg + *(ub32 *)loc |= bits(sym.get_gottp_addr(ctx) + A - GOT, 9, 0); + } else { + u32 rd = bits(*(ub32 *)loc, 29, 25); + *(ub32 *)loc = 0x8018'2000 | (rd << 25) | (rd << 14); // xor %reg, $0, %reg + *(ub32 *)loc |= bits(S + A - ctx.tp_addr, 9, 0) | 0b1'1100'0000'0000; + } + break; + case R_SPARC_TLS_GD_ADD: + if (sym.has_tlsgd(ctx)) { + // do nothing + } else if (sym.has_gottp(ctx)) { + u32 rs2 = bits(*(ub32 *)loc, 4, 0); + *(ub32 *)loc = 0xd05d'c000 | rs2; // ldx [ %l7 + %reg ], %o0 + } else { + u32 rs2 = bits(*(ub32 *)loc, 4, 0); + *(ub32 *)loc = 0x9001'c000 | rs2; // add %g7, %reg, %o0 + } break; case R_SPARC_TLS_GD_CALL: - case R_SPARC_TLS_LDM_CALL: { - u64 addr; - if (ctx.arg.is_static) - addr = ctx.extra.tls_get_addr_sec->shdr.sh_addr; - else - addr = ctx.extra.tls_get_addr_sym->get_addr(ctx); - - *(ub32 *)loc |= bits(addr + A - P, 31, 2); + if (sym.has_tlsgd(ctx)) { + u64 addr = ctx.extra.tls_get_addr->get_addr(ctx); + *(ub32 *)loc |= bits(addr + A - P, 31, 2); + } else if (sym.has_gottp(ctx)) { + *(ub32 *)loc = 0x9001'c008; // add %g7, %o0, %o0 + } else { + *(ub32 *)loc = 0x0100'0000; // nop + } break; - } case R_SPARC_TLS_LDM_HI22: - *(ub32 *)loc |= bits(ctx.got->get_tlsld_addr(ctx) + A - GOT, 31, 10); + if (ctx.got->has_tlsld(ctx)) + *(ub32 *)loc |= bits(ctx.got->get_tlsld_addr(ctx) + A - GOT, 31, 10); + else + *(ub32 *)loc |= bits(ctx.tp_addr - ctx.tls_begin, 31, 10); break; case R_SPARC_TLS_LDM_LO10: - *(ub32 *)loc |= bits(ctx.got->get_tlsld_addr(ctx) + A - GOT, 9, 0); + if (ctx.got->has_tlsld(ctx)) + *(ub32 *)loc |= bits(ctx.got->get_tlsld_addr(ctx) + A - GOT, 9, 0); + else + *(ub32 *)loc |= bits(ctx.tp_addr - ctx.tls_begin, 9, 0); + break; + case R_SPARC_TLS_LDM_ADD: + if (ctx.got->has_tlsld(ctx)) { + // do nothing + } else { + u32 rs2 = bits(*(ub32 *)loc, 4, 0); + *(ub32 *)loc = 0x9021'c000 | rs2; // sub %g7, %reg, %o0 + } + break; + case R_SPARC_TLS_LDM_CALL: + if (ctx.got->has_tlsld(ctx)) { + u64 addr = ctx.extra.tls_get_addr->get_addr(ctx); + *(ub32 *)loc |= bits(addr + A - P, 31, 2); + } else { + *(ub32 *)loc = 0x0100'0000; // nop + } break; case R_SPARC_TLS_LDO_HIX22: *(ub32 *)loc |= bits(S + A - ctx.dtp_addr, 31, 10); @@ -402,8 +442,7 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { case R_SPARC_SIZE32: *(ub32 *)loc = sym.esym().st_size + A; break; - case R_SPARC_TLS_GD_ADD: - case R_SPARC_TLS_LDM_ADD: + case R_SPARC_64: case R_SPARC_TLS_LDO_ADD: case R_SPARC_TLS_IE_LD: case R_SPARC_TLS_IE_LDX: @@ -471,8 +510,6 @@ void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { template <> void InputSection::scan_relocations(Context &ctx) { assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); std::span> rels = get_rels(ctx); // Scan relocations @@ -487,9 +524,6 @@ void InputSection::scan_relocations(Context &ctx) { sym.flags |= NEEDS_GOT | NEEDS_PLT; switch (rel.r_type) { - case R_SPARC_64: - scan_dyn_absrel(ctx, sym, rel); - break; case R_SPARC_8: case R_SPARC_5: case R_SPARC_6: @@ -554,24 +588,36 @@ void InputSection::scan_relocations(Context &ctx) { scan_pcrel(ctx, sym, rel); break; case R_SPARC_TLS_GD_HI22: - sym.flags |= NEEDS_TLSGD; + if (ctx.arg.static_ || (ctx.arg.relax && sym.is_tprel_linktime_const(ctx))) { + // We always relax if -static because libc.a doesn't contain + // __tls_get_addr(). + } else if (ctx.arg.relax && sym.is_tprel_runtime_const(ctx)) { + sym.flags |= NEEDS_GOTTP; + } else { + sym.flags |= NEEDS_TLSGD; + } break; case R_SPARC_TLS_LDM_HI22: - ctx.needs_tlsld = true; + if (ctx.arg.static_ || (ctx.arg.relax && !ctx.arg.shared)) { + // We always relax if -static because libc.a doesn't contain + // __tls_get_addr(). + } else { + ctx.needs_tlsld = true; + } break; case R_SPARC_TLS_IE_HI22: sym.flags |= NEEDS_GOTTP; break; case R_SPARC_TLS_GD_CALL: case R_SPARC_TLS_LDM_CALL: - if (!ctx.arg.is_static) - if (Symbol &sym = *ctx.extra.tls_get_addr_sym; sym.is_imported) - sym.flags |= NEEDS_PLT; + if (Symbol *sym = ctx.extra.tls_get_addr; sym->is_imported) + sym->flags |= NEEDS_PLT; break; case R_SPARC_TLS_LE_HIX22: case R_SPARC_TLS_LE_LOX10: check_tlsle(ctx, sym, rel); break; + case R_SPARC_64: case R_SPARC_GOTDATA_OP_LOX10: case R_SPARC_GOTDATA_OP: case R_SPARC_GOTDATA_LOX10: @@ -594,25 +640,4 @@ void InputSection::scan_relocations(Context &ctx) { } } -// __tls_get_addr is not defined by libc.a, so we can't use that function -// in statically-linked executables. This section provides a replacement. -void SparcTlsGetAddrSection::copy_buf(Context &ctx) { - ub32 *buf = (ub32 *)(ctx.buf + this->shdr.sh_offset); - - static const ub32 insn[] = { - 0x0300'0000, // sethi %hi(TP_SIZE), %g1 - 0x8210'6000, // or %g1, %lo(TP_SIZE), %g1 - 0x8221'c001, // sub %g7, %g1, %g1 - 0xd05a'2008, // ldx [ %o0 + 8 ], %o0 - 0x81c3'e008, // retl - 0x9000'4008, // add %g1, %o0, %o0 - }; - - assert(this->shdr.sh_size == sizeof(insn)); - memcpy(buf, insn, sizeof(insn)); - - buf[0] |= bits(ctx.tp_addr - ctx.tls_begin, 31, 10); - buf[1] |= bits(ctx.tp_addr - ctx.tls_begin, 9, 0); -} - -} // namespace mold::elf +} // namespace mold diff --git a/elf/arch-x86-64.cc b/src/arch-x86-64.cc similarity index 84% rename from elf/arch-x86-64.cc rename to src/arch-x86-64.cc index 9266b957..4e0b5f93 100644 --- a/elf/arch-x86-64.cc +++ b/src/arch-x86-64.cc @@ -28,7 +28,9 @@ #include "mold.h" -namespace mold::elf { +#include + +namespace mold { using E = X86_64; @@ -299,10 +301,10 @@ static void relax_gd_to_ie(u8 *loc, ElfRel rel, u64 val) { } // Rewrite a function call to __tls_get_addr to a cheaper instruction -// sequence. The difference from relax_gd_to_le is that we are -// materializing a Dynamic Thread Pointer for the current ELF module -// instead of an address for a particular thread-local variable. -static void relax_ld_to_le(u8 *loc, ElfRel rel, u64 tls_size) { +// sequence. The difference from relax_gd_to_le is that we are materializing +// the address of the beginning of TLS block instead of an address of a +// particular thread-local variable. +static void relax_ld_to_le(u8 *loc, ElfRel rel, i64 tls_size) { switch (rel.r_type) { case R_X86_64_PLT32: case R_X86_64_PC32: { @@ -311,10 +313,9 @@ static void relax_ld_to_le(u8 *loc, ElfRel rel, u64 tls_size) { // 48 8d 3d 00 00 00 00 lea foo@tlsld(%rip), %rdi // e8 00 00 00 00 call __tls_get_addr // - // The instructions are so short that we cannot rewrite them with - // "mov %fs:0, %rax" which is 9 bytes long. We use a shorter code - // sequence instead. Since "xor %eax, %eax" zero-clears %rax, the - // meaning is equivalent. + // Because the original instruction sequence is so short that we need a + // little bit of code golfing here. "mov %fs:0, %rax" is 9 byte long, so + // xor + mov is shorter. Note that `xor %eax, %eax` zero-clears %eax. static const u8 insn[] = { 0x31, 0xc0, // xor %eax, %eax 0x64, 0x48, 0x8b, 0x00, // mov %fs:(%rax), %rax @@ -331,13 +332,12 @@ static void relax_ld_to_le(u8 *loc, ElfRel rel, u64 tls_size) { // 48 8d 3d 00 00 00 00 lea foo@tlsld(%rip), %rdi // ff 15 00 00 00 00 call *__tls_get_addr@GOT(%rip) static const u8 insn[] = { - 0x31, 0xc0, // xor %eax, %eax + 0x48, 0x31, 0xc0, // xor %rax, %rax 0x64, 0x48, 0x8b, 0x00, // mov %fs:(%rax), %rax 0x48, 0x2d, 0, 0, 0, 0, // sub $tls_size, %rax - 0x90, // nop }; memcpy(loc - 3, insn, sizeof(insn)); - *(ul32 *)(loc + 5) = tls_size; + *(ul32 *)(loc + 6) = tls_size; break; } case R_X86_64_PLTOFF64: { @@ -368,11 +368,6 @@ template <> void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { std::span> rels = get_rels(ctx); - ElfRel *dynrel = nullptr; - if (ctx.reldyn) - dynrel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + - file.reldyn_offset + this->reldyn_offset); - for (i64 i = 0; i < rels.size(); i++) { const ElfRel &rel = rels[i]; if (rel.r_type == R_NONE) @@ -420,7 +415,6 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { write32s(S + A); break; case R_X86_64_64: - apply_dyn_absrel(ctx, sym, rel, loc, S, A, P, &dynrel); break; case R_X86_64_PC8: check(S + A - P, -(1 << 7), 1 << 7); @@ -536,14 +530,16 @@ void InputSection::apply_reloc_alloc(Context &ctx, u8 *base) { // call *(%rax) // R_X86_64_TLSDESC_CALL foo // - // We may relax the instructions to the following for non-dlopen'd DSO + // We may relax the instructions to the following if its TP-relative + // address is known at link-time // - // mov foo@GOTTPOFF(%rip), %rax + // mov $foo@TPOFF, %rax // nop // - // or to the following for executable. + // or to the following if the TP-relative address is known at + // process startup time. // - // mov $foo@TPOFF, %rax + // mov foo@GOTTPOFF(%rip), %rax // nop // // We allow the following alternative code sequence too because @@ -707,8 +703,6 @@ void InputSection::apply_reloc_nonalloc(Context &ctx, u8 *base) { template <> void InputSection::scan_relocations(Context &ctx) { assert(shdr().sh_flags & SHF_ALLOC); - - this->reldyn_offset = file.num_dynrel * sizeof(ElfRel); std::span> rels = get_rels(ctx); // Scan relocations @@ -743,9 +737,6 @@ void InputSection::scan_relocations(Context &ctx) { case R_X86_64_32S: scan_absrel(ctx, sym, rel); break; - case R_X86_64_64: - scan_dyn_absrel(ctx, sym, rel); - break; case R_X86_64_PC8: case R_X86_64_PC16: case R_X86_64_PC32: @@ -768,8 +759,7 @@ void InputSection::scan_relocations(Context &ctx) { sym.flags |= NEEDS_PLT; break; case R_X86_64_TLSGD: - if ((ctx.arg.relax && sym.is_tprel_linktime_const(ctx)) || - ctx.arg.is_static) { + if (ctx.arg.static_ || (ctx.arg.relax && sym.is_tprel_linktime_const(ctx))) { // We always relax if -static because libc.a doesn't contain // __tls_get_addr(). i++; @@ -783,7 +773,7 @@ void InputSection::scan_relocations(Context &ctx) { case R_X86_64_TLSLD: // We always relax if -static because libc.a doesn't contain // __tls_get_addr(). - if (ctx.arg.is_static || (ctx.arg.relax && !ctx.arg.shared)) + if (ctx.arg.static_ || (ctx.arg.relax && !ctx.arg.shared)) i++; else ctx.needs_tlsld = true; @@ -803,6 +793,7 @@ void InputSection::scan_relocations(Context &ctx) { case R_X86_64_TPOFF64: check_tlsle(ctx, sym, rel); break; + case R_X86_64_64: case R_X86_64_GOTOFF64: case R_X86_64_DTPOFF32: case R_X86_64_DTPOFF64: @@ -816,4 +807,95 @@ void InputSection::scan_relocations(Context &ctx) { } } -} // namespace mold::elf +// Intel CET is a relatively new CPU feature to enhance security by +// protecting control flow integrity. If the feature is enabled, indirect +// branches (i.e. branch instructions that take a register instead of an +// immediate) must land on a "landing pad" instruction, or a CPU-level fault +// will raise. That prevents an attacker to branch to a middle of a random +// function, making ROP or JOP much harder to conduct. +// +// On x86-64, the landing pad instruction is ENDBR64. That is actually a +// repurposed NOP instruction to provide binary compatibility with older +// hardware that doesn't support CET. +// +// The problem here is that the compiler always emits a landing pad at the +// beginning fo a global function because it doesn't know whether or not the +// function's address is taken in other translation units. As a result, the +// resulting binary contains more landing pads than necessary. +// +// This function rewrites a landing pad with a nop if the function's address +// was not actually taken. We can do what the compiler cannot because we +// know about all translation units. +void rewrite_endbr(Context &ctx) { + Timer t(ctx, "rewrite_endbr"); + + constexpr u8 endbr64[] = {0xf3, 0x0f, 0x1e, 0xfa}; + constexpr u8 nop[] = {0x0f, 0x1f, 0x40, 0x00}; + + // Rewrite all endbr64 instructions referred to by function symbols with + // NOPs. We handle only global symbols because the compiler doesn't emit + // an endbr64 for a file-scoped function in the first place if its address + // is not taken within the file. + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (Symbol *sym : file->get_global_syms()) { + if (sym->file == file && sym->esym().st_type == STT_FUNC) { + if (InputSection *isec = sym->get_input_section(); + isec && (isec->shdr().sh_flags & SHF_EXECINSTR)) { + if (OutputSection *osec = isec->output_section) { + u8 *buf = ctx.buf + osec->shdr.sh_offset + isec->offset + sym->value; + if (memcmp(buf, endbr64, 4) == 0) + memcpy(buf, nop, 4); + } + } + } + } + }); + + auto write_back = [&](InputSection *isec, i64 offset) { + // If isec has an endbr64 at a given offset, copy that instruction to + // the output buffer, possibly overwriting a nop written in the above + // loop. + if (isec && isec->output_section && + (isec->shdr().sh_flags & SHF_EXECINSTR) && + 0 <= offset && offset <= isec->contents.size() - 4 && + memcmp(isec->contents.data() + offset, endbr64, 4) == 0) + memcpy(ctx.buf + isec->output_section->shdr.sh_offset + isec->offset + offset, + endbr64, 4); + }; + + // Write back endbr64 instructions if they are referred to by address-taking + // relocations. + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (std::unique_ptr> &isec : file->sections) { + if (isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC)) { + for (const ElfRel &rel : isec->get_rels(ctx)) { + if (!is_func_call_rel(rel)) { + Symbol *sym = file->symbols[rel.r_sym]; + if (sym->esym().st_type == STT_SECTION) + write_back(sym->get_input_section(), rel.r_addend); + else + write_back(sym->get_input_section(), sym->value); + } + } + } + } + }); + + // We record addresses of some symbols in the ELF header, .dynamic or in + // .dynsym. We need to retain endbr64s for such symbols. + auto keep = [&](Symbol *sym) { + if (sym) + write_back(sym->get_input_section(), sym->value); + }; + + keep(ctx.arg.entry); + keep(ctx.arg.init); + keep(ctx.arg.fini); + + if (ctx.dynsym) + for (Symbol *sym : ctx.dynsym->symbols) + if (sym && sym->is_exported) + keep(sym); +} + +} // namespace mold diff --git a/elf/cmdline.cc b/src/cmdline.cc similarity index 92% rename from elf/cmdline.cc rename to src/cmdline.cc index e053f090..bdb79ed9 100644 --- a/elf/cmdline.cc +++ b/src/cmdline.cc @@ -15,7 +15,7 @@ # define STDERR_FILENO (_fileno(stderr)) #endif -namespace mold::elf { +namespace mold { inline const char helpmsg[] = R"( Options: @@ -44,7 +44,8 @@ inline const char helpmsg[] = R"( -f SHLIB, --auxiliary SHLIB Set DT_AUXILIARY to the specified value -h LIBNAME, --soname LIBNAME Set shared library name - -l LIBNAME Search for a given library + -l LIBNAME, --library LIBNAME + Search for a given library -m TARGET Set target -o FILE, --output FILE Set output filename -q, --emit-relocs Leaves relocation sections in the output @@ -71,7 +72,7 @@ inline const char helpmsg[] = R"( --no-apply-dynamic-relocs --as-needed Only set DT_NEEDED if used --no-as-needed - --build-id [none,md5,sha1,sha256,uuid,HEXSTRING] + --build-id [none,md5,sha1,sha256,fast,uuid,HEXSTRING] Generate build ID --no-build-id --chroot DIR Set a given path to the root directory @@ -85,11 +86,14 @@ inline const char helpmsg[] = R"( --defsym=SYMBOL=VALUE Define a symbol alias --demangle Demangle C++ symbols in log messages (default) --no-demangle + --detach Create separate debug info file in the background (default) + --no-detach --enable-new-dtags Emit DT_RUNPATH for --rpath (default) --disable-new-dtags Emit DT_RPATH for --rpath --execute-only Make executable segments unreadable --dp Ignored --dynamic-list=FILE Read a list of dynamic symbols (implies -Bsymbolic) + --dynamic-list-data Add data symbols to dynamic symbols --eh-frame-hdr Create .eh_frame_hdr section --no-eh-frame-hdr --exclude-libs LIB,LIB,.. Mark all symbols in given libraries as hidden @@ -143,6 +147,8 @@ inline const char helpmsg[] = R"( --rpath-link DIR Ignored --run COMMAND ARG... Run COMMAND with mold as /usr/bin/ld --section-start=SECTION=ADDR Set address for section + --separate-debug-file[=FILE] Separate debug info to the specified file + --no-separate-debug-file --shared, --Bshareable Create a shared library --shuffle-sections[=SEED] Randomize the output by shuffling input sections --sort-common Ignored @@ -209,13 +215,15 @@ inline const char helpmsg[] = R"( -z stack-size=VALUE Set the size of the stack segment -z relro Make some sections read-only after relocation (default) -z norelro + -z rewrite-endbr Rewrite indirect branch target instructions with NOPs + -z norewrite-endbr -z rodynamic Make the .dynamic section read-only -z text Report error if DT_TEXTREL is set -z notext -z textoff -mold: supported targets: elf32-i386 elf64-x86-64 elf32-littlearm elf64-littleaarch64 elf32-littleriscv elf32-bigriscv elf64-littleriscv elf64-bigriscv elf32-powerpc elf64-powerpc elf64-powerpc elf64-powerpcle elf64-s390 elf64-sparc elf32-m68k elf32-sh-linux elf64-alpha elf64-loongarch elf32-loongarch -mold: supported emulations: elf_i386 elf_x86_64 armelf_linux_eabi aarch64linux aarch64elf elf32lriscv elf32briscv elf64lriscv elf64briscv elf32ppc elf32ppclinux elf64ppc elf64lppc elf64_s390 elf64_sparc m68kelf shlelf_linux elf64alpha elf64loongarch elf32loongarch)"; +mold: supported targets: elf32-i386 elf64-x86-64 elf32-littlearm elf64-littleaarch64 elf32-littleriscv elf32-bigriscv elf64-littleriscv elf64-bigriscv elf32-powerpc elf64-powerpc elf64-powerpc elf64-powerpcle elf64-s390 elf64-sparc elf32-m68k elf32-sh-linux elf64-loongarch elf32-loongarch +mold: supported emulations: elf_i386 elf_x86_64 armelf_linux_eabi aarch64linux aarch64elf elf32lriscv elf32briscv elf64lriscv elf64briscv elf32ppc elf32ppclinux elf64ppc elf64lppc elf64_s390 elf64_sparc m68kelf shlelf_linux elf64loongarch elf32loongarch)"; template static std::vector @@ -365,6 +373,15 @@ static i64 parse_number(Context &ctx, std::string opt, return ret; } +static char from_hex(char c) { + if ('0' <= c && c <= '9') + return c - '0'; + if ('a' <= c && c <= 'f') + return c - 'a' + 10; + assert('A' <= c && c <= 'F'); + return c - 'A' + 10; +} + template static std::vector parse_hex_build_id(Context &ctx, std::string_view arg) { auto flags = std::regex_constants::optimize | std::regex_constants::ECMAScript; @@ -373,23 +390,34 @@ static std::vector parse_hex_build_id(Context &ctx, std::string_view arg) if (!std::regex_match(arg.begin(), arg.end(), re)) Fatal(ctx) << "invalid build-id: " << arg; - arg = arg.substr(2); - - auto fn = [](char c) { - if ('0' <= c && c <= '9') - return c - '0'; - if ('a' <= c && c <= 'f') - return c - 'a' + 10; - assert('A' <= c && c <= 'F'); - return c - 'A' + 10; - }; - std::vector vec; - for (i64 i = 0; i < arg.size(); i += 2) - vec.push_back((fn(arg[i]) << 4) | fn(arg[i + 1])); + for (i64 i = 2; i < arg.size(); i += 2) + vec.push_back((from_hex(arg[i]) << 4) | from_hex(arg[i + 1])); return vec; } +template +static std::string +parse_encoded_package_metadata(Context &ctx, std::string_view arg) { + auto flags = std::regex_constants::optimize | std::regex_constants::ECMAScript; + static std::regex re(R"(([^%]|%[0-9a-fA-F][0-9a-fA-F])*)", flags); + + if (!std::regex_match(arg.begin(), arg.end(), re)) + Fatal(ctx) << "--encoded-package-metadata: invalid string: " << arg; + + std::ostringstream out; + while (!arg.empty()) { + if (arg[0] == '%') { + out << (char)((from_hex(arg[1]) << 4) | from_hex(arg[2])); + arg = arg.substr(3); + } else { + out << arg[0]; + arg = arg.substr(1); + } + } + return out.str(); +} + static std::vector split_by_comma_or_colon(std::string_view str) { std::vector vec; @@ -401,7 +429,7 @@ split_by_comma_or_colon(std::string_view str) { break; } vec.push_back(str.substr(0, pos)); - str = str.substr(pos); + str = str.substr(pos + 1); } return vec; } @@ -410,8 +438,7 @@ template static void read_retain_symbols_file(Context &ctx, std::string_view path) { MappedFile *mf = must_open_file(ctx, std::string(path)); std::string_view data((char *)mf->data, mf->size); - - ctx.arg.retain_symbols_file.reset(new std::unordered_set); + std::vector *> vec; while (!data.empty()) { size_t pos = data.find('\n'); @@ -427,8 +454,10 @@ static void read_retain_symbols_file(Context &ctx, std::string_view path) { name = string_trim(name); if (!name.empty()) - ctx.arg.retain_symbols_file->insert(name); + vec.push_back(get_symbol(ctx, name)); } + + ctx.arg.retain_symbols_file = std::move(vec); } static bool is_file(std::string_view path) { @@ -526,8 +555,10 @@ std::vector parse_nonpositional_args(Context &ctx) { std::optional z_separate_code; std::optional report_undefined; std::optional z_relro; + std::optional separate_debug_file; std::optional shuffle_sections_seed; std::unordered_set rpaths; + std::vector version_scripts; auto add_rpath = [&](std::string_view arg) { if (rpaths.insert(arg).second) { @@ -537,9 +568,9 @@ std::vector parse_nonpositional_args(Context &ctx) { } }; - // RISC-V object files contains lots of local symbols, so by default - // we discard them. This is compatible with GNU ld. - if constexpr (is_riscv) + // RISC-V and LoongArch object files contains lots of local symbols, + // so by default we discard them. This is compatible with GNU ld. + if constexpr (is_riscv || is_loongarch) ctx.arg.discard_locals = true; // We generally don't need to write addends to relocated places if the @@ -554,8 +585,7 @@ std::vector parse_nonpositional_args(Context &ctx) { // // - Static PIE binaries crash on startup in some RISC-V environment if // we write addends to relocated places. - if constexpr (is_sparc || is_riscv) - ctx.arg.apply_dynamic_relocs = false; + ctx.arg.apply_dynamic_relocs = !is_sparc && !is_riscv; auto read_arg = [&](std::string name) { for (const std::string &opt : add_dashes(name)) { @@ -652,7 +682,7 @@ std::vector parse_nonpositional_args(Context &ctx) { << " elf64briscv\n elf32lriscv\n elf32briscv\n" << " elf32ppc\n elf64ppc\n elf64lppc\n elf64_s390\n" << " elf64_sparc\n m68kelf\n shlelf_linux\n" - << " elf64alpha\n elf64loongarch\n elf32loongarch"; + << " elf64loongarch\n elf32loongarch"; version_shown = true; } else if (read_arg("m")) { if (arg == "elf_x86_64") { @@ -685,8 +715,6 @@ std::vector parse_nonpositional_args(Context &ctx) { ctx.arg.emulation = M68K::target_name; } else if (arg == "shlelf_linux") { ctx.arg.emulation = SH4::target_name; - } else if (arg == "elf64alpha") { - ctx.arg.emulation = ALPHA::target_name; } else if (arg == "elf64loongarch") { ctx.arg.emulation = LOONGARCH64::target_name; } else if (arg == "elf32loongarch") { @@ -725,10 +753,10 @@ std::vector parse_nonpositional_args(Context &ctx) { } else if (read_flag("print-map") || read_flag("M")) { ctx.arg.print_map = true; } else if (read_flag("Bstatic") || read_flag("dn") || read_flag("static")) { - ctx.arg.is_static = true; + ctx.arg.static_ = true; remaining.push_back("--Bstatic"); } else if (read_flag("Bdynamic") || read_flag("dy")) { - ctx.arg.is_static = false; + ctx.arg.static_ = false; remaining.push_back("--Bdynamic"); } else if (read_flag("shared") || read_flag("Bshareable")) { ctx.arg.shared = true; @@ -757,6 +785,10 @@ std::vector parse_nonpositional_args(Context &ctx) { ctx.arg.demangle = true; } else if (read_flag("no-demangle")) { ctx.arg.demangle = false; + } else if (read_flag("detach")) { + ctx.arg.detach = true; + } else if (read_flag("no-detach")) { + ctx.arg.detach = false; } else if (read_flag("default-symver")) { ctx.arg.default_symver = true; } else if (read_flag("noinhibit-exec")) { @@ -863,6 +895,8 @@ std::vector parse_nonpositional_args(Context &ctx) { } else if (read_flag("pack-dyn-relocs=none") || read_z_flag("nopack-relative-relocs")) { ctx.arg.pack_dyn_relocs_relr = false; + } else if (read_arg("encoded-package-metadata")) { + ctx.arg.package_metadata = parse_encoded_package_metadata(ctx, arg); } else if (read_arg("package-metadata")) { ctx.arg.package_metadata = arg; } else if (read_flag("stats")) { @@ -908,7 +942,7 @@ std::vector parse_nonpositional_args(Context &ctx) { ctx.arg.wrap.insert(arg); } else if (read_flag("omagic") || read_flag("N")) { ctx.arg.omagic = true; - ctx.arg.is_static = true; + ctx.arg.static_ = true; } else if (read_flag("no-omagic")) { ctx.arg.omagic = false; } else if (read_arg("oformat")) { @@ -1004,6 +1038,12 @@ std::vector parse_nonpositional_args(Context &ctx) { ctx.arg.z_origin = true; } else if (read_z_flag("nodefaultlib")) { ctx.arg.z_nodefaultlib = true; + } else if (read_eq("separate-debug-file")) { + separate_debug_file = arg; + } else if (read_flag("separate-debug-file")) { + separate_debug_file = ""; + } else if (read_flag("no-separate-debug-file")) { + separate_debug_file.reset(); } else if (read_z_flag("separate-loadable-segments")) { z_separate_code = SEPARATE_LOADABLE_SEGMENTS; } else if (read_z_flag("separate-code")) { @@ -1020,10 +1060,20 @@ std::vector parse_nonpositional_args(Context &ctx) { ctx.arg.z_sectionheader = true; } else if (read_z_flag("nosectionheader")) { ctx.arg.z_sectionheader = false; - } else if (read_z_flag("rewrite-endbr")) { - ctx.arg.z_rewrite_endbr = true; } else if (read_z_flag("rodynamic")) { ctx.arg.z_rodynamic = true; + } else if (read_z_flag("x86-64-v2")) { + ctx.arg.z_x86_64_isa_level |= GNU_PROPERTY_X86_ISA_1_V2; + } else if (read_z_flag("x86-64-v3")) { + ctx.arg.z_x86_64_isa_level |= GNU_PROPERTY_X86_ISA_1_V3; + } else if (read_z_flag("x86-64-v4")) { + ctx.arg.z_x86_64_isa_level |= GNU_PROPERTY_X86_ISA_1_V4; + } else if (read_z_flag("rewrite-endbr")) { + if constexpr (!is_x86_64) + Fatal(ctx) << "-z rewrite-endbr is supported only on x86-64"; + ctx.arg.z_rewrite_endbr = true; + } else if (read_z_flag("norewrite-endbr")) { + ctx.arg.z_rewrite_endbr = false; } else if (read_flag("nmagic")) { ctx.arg.nmagic = true; } else if (read_flag("no-nmagic")) { @@ -1181,7 +1231,7 @@ std::vector parse_nonpositional_args(Context &ctx) { } else if (arg == "sha1") { ctx.arg.build_id.kind = BuildId::HASH; ctx.arg.build_id.hash_size = 20; - } else if (arg == "sha256") { + } else if (arg == "sha256" || arg == "fast") { ctx.arg.build_id.kind = BuildId::HASH; ctx.arg.build_id.hash_size = 32; } else if (arg.starts_with("0x") || arg.starts_with("0X")) { @@ -1203,6 +1253,10 @@ std::vector parse_nonpositional_args(Context &ctx) { ctx.arg.auxiliary.push_back(arg); } else if (read_arg("filter") || read_arg("F")) { ctx.arg.filter.push_back(arg); + } else if (read_flag("allow-shlib-undefined")) { + ctx.arg.allow_shlib_undefined = true; + } else if (read_flag("no-allow-shlib-undefined")) { + ctx.arg.allow_shlib_undefined = false; } else if (read_arg("O")) { } else if (read_flag("EB")) { } else if (read_flag("EL")) { @@ -1220,8 +1274,6 @@ std::vector parse_nonpositional_args(Context &ctx) { } else if (read_flag("enable-new-dtags")) { } else if (read_flag("disable-new-dtags")) { } else if (read_flag("nostdlib")) { - } else if (read_flag("allow-shlib-undefined")) { - } else if (read_flag("no-allow-shlib-undefined")) { } else if (read_flag("no-add-needed")) { } else if (read_flag("no-call-graph-profile-sort")) { } else if (read_flag("no-copy-dt-needed-entries")) { @@ -1246,17 +1298,12 @@ std::vector parse_nonpositional_args(Context &ctx) { } else if (read_flag("no-keep-memory")) { } else if (read_arg("max-cache-size")) { } else if (read_arg("version-script")) { - // --version-script is treated as positional arguments even though - // they are actually not positional. This is because linker scripts - // (a positional argument) can also specify a version script, and - // it's better to consolidate parsing in read_input_files. In - // particular, version scripts can modify ctx.default_version which - // we initialize *after* parsing non-positional args, so the parsing - // cannot be done right here. - remaining.push_back("--version-script=" + std::string(arg)); + version_scripts.push_back(arg); } else if (read_arg("dynamic-list")) { ctx.arg.Bsymbolic = BSYMBOLIC_ALL; append(ctx.dynamic_list_patterns, parse_dynamic_list(ctx, arg)); + } else if (read_arg("dynamic-list-data")) { + ctx.arg.dynamic_list_data = true; } else if (read_arg("export-dynamic-symbol")) { ctx.dynamic_list_patterns.push_back({arg, ""}); } else if (read_arg("export-dynamic-symbol-list")) { @@ -1269,7 +1316,7 @@ std::vector parse_nonpositional_args(Context &ctx) { remaining.push_back("--whole-archive"); } else if (read_flag("no-whole-archive")) { remaining.push_back("--no-whole-archive"); - } else if (read_arg("l")) { + } else if (read_arg("l") || read_arg("library")) { remaining.push_back("-l" + std::string(arg)); } else if (read_arg("script") || read_arg("T")) { remaining.push_back(std::string(arg)); @@ -1287,7 +1334,7 @@ std::vector parse_nonpositional_args(Context &ctx) { Fatal(ctx) << "unknown command line option: -dynamic; -dynamic is a " << "macOS linker's option. mold does not support macOS."; } else { - if (args[0][0] == '-') + if (args[0].starts_with('-')) Fatal(ctx) << "unknown command line option: " << args[0]; remaining.push_back(std::string(args[0])); args = args.subspan(1); @@ -1332,7 +1379,7 @@ std::vector parse_nonpositional_args(Context &ctx) { } if (ctx.arg.relocatable) - ctx.arg.is_static = true; + ctx.arg.static_ = true; if (ctx.arg.shuffle_sections == SHUFFLE_SECTIONS_SHUFFLE) { if (shuffle_sections_seed) @@ -1395,9 +1442,35 @@ std::vector parse_nonpositional_args(Context &ctx) { ctx.default_version = VER_NDX_LAST_RESERVED + 1; } + for (std::string_view path : version_scripts) { + auto open = [&] { + if (MappedFile *mf = open_file(ctx, std::string(path))) + return mf; + for (std::string_view dir : ctx.arg.library_paths) + if (MappedFile *mf = + open_file(ctx, std::string(dir) + "/" + std::string(path))) + return mf; + Fatal(ctx) << "--version-script: file not found: " << path; + }; + + ReaderContext rctx; + Script(ctx, rctx, open()).parse_version_script(); + } + + if (separate_debug_file) { + if (separate_debug_file->empty()) + ctx.arg.separate_debug_file = ctx.arg.output + ".dbg"; + else + ctx.arg.separate_debug_file = *separate_debug_file; + } + if (ctx.arg.shared && warn_shared_textrel) ctx.arg.warn_textrel = true; + // We don't want the background process to write to stdout + if (ctx.arg.stats || ctx.arg.perf) + ctx.arg.detach = false; + ctx.arg.undefined.push_back(ctx.arg.entry); for (i64 i = 0; i < ctx.arg.defsyms.size(); i++) { @@ -1450,4 +1523,4 @@ using E = MOLD_TARGET; template std::vector expand_response_files(Context &, char **); template std::vector parse_nonpositional_args(Context &ctx); -} // namespace mold::elf +} // namespace mold diff --git a/elf/config.cc b/src/config.cc similarity index 85% rename from elf/config.cc rename to src/config.cc index 55db9603..af578ab8 100644 --- a/elf/config.cc +++ b/src/config.cc @@ -1,7 +1,7 @@ #include "mold.h" #include "config.h" -namespace mold::elf { +namespace mold { std::string get_mold_version() { if (mold_git_hash.empty()) @@ -10,4 +10,4 @@ std::string get_mold_version() { "; compatible with GNU ld)"; } -} // namespace mold::elf +} // namespace mold diff --git a/elf/elf.cc b/src/elf.cc similarity index 96% rename from elf/elf.cc rename to src/elf.cc index 2ce2ec47..8f78df67 100644 --- a/elf/elf.cc +++ b/src/elf.cc @@ -1,6 +1,6 @@ -#include "mold.h" +#include "elf.h" -namespace mold::elf { +namespace mold { static std::string unknown_type(u32 r_type) { char buf[50]; @@ -890,46 +890,6 @@ std::string rel_to_string(u32 r_type) { return unknown_type(r_type); } -template <> -std::string rel_to_string(u32 r_type) { - switch (r_type) { - CASE(R_ALPHA_NONE); - CASE(R_ALPHA_REFLONG); - CASE(R_ALPHA_REFQUAD); - CASE(R_ALPHA_GPREL32); - CASE(R_ALPHA_LITERAL); - CASE(R_ALPHA_LITUSE); - CASE(R_ALPHA_GPDISP); - CASE(R_ALPHA_BRADDR); - CASE(R_ALPHA_HINT); - CASE(R_ALPHA_SREL16); - CASE(R_ALPHA_SREL32); - CASE(R_ALPHA_SREL64); - CASE(R_ALPHA_GPRELHIGH); - CASE(R_ALPHA_GPRELLOW); - CASE(R_ALPHA_GPREL16); - CASE(R_ALPHA_COPY); - CASE(R_ALPHA_GLOB_DAT); - CASE(R_ALPHA_JMP_SLOT); - CASE(R_ALPHA_RELATIVE); - CASE(R_ALPHA_BRSGP); - CASE(R_ALPHA_TLSGD); - CASE(R_ALPHA_TLSLDM); - CASE(R_ALPHA_DTPMOD64); - CASE(R_ALPHA_GOTDTPREL); - CASE(R_ALPHA_DTPREL64); - CASE(R_ALPHA_DTPRELHI); - CASE(R_ALPHA_DTPRELLO); - CASE(R_ALPHA_DTPREL16); - CASE(R_ALPHA_GOTTPREL); - CASE(R_ALPHA_TPREL64); - CASE(R_ALPHA_TPRELHI); - CASE(R_ALPHA_TPRELLO); - CASE(R_ALPHA_TPREL16); - } - return unknown_type(r_type); -} - template <> std::string rel_to_string(u32 r_type) { switch (r_type) { @@ -946,6 +906,8 @@ std::string rel_to_string(u32 r_type) { CASE(R_LARCH_TLS_TPREL32); CASE(R_LARCH_TLS_TPREL64); CASE(R_LARCH_IRELATIVE); + CASE(R_LARCH_TLS_DESC32); + CASE(R_LARCH_TLS_DESC64); CASE(R_LARCH_MARK_LA); CASE(R_LARCH_MARK_PCREL); CASE(R_LARCH_SOP_PUSH_PCREL); @@ -1031,6 +993,23 @@ std::string rel_to_string(u32 r_type) { CASE(R_LARCH_ADD_ULEB128); CASE(R_LARCH_SUB_ULEB128); CASE(R_LARCH_64_PCREL); + CASE(R_LARCH_CALL36); + CASE(R_LARCH_TLS_DESC_PC_HI20); + CASE(R_LARCH_TLS_DESC_PC_LO12); + CASE(R_LARCH_TLS_DESC64_PC_LO20); + CASE(R_LARCH_TLS_DESC64_PC_HI12); + CASE(R_LARCH_TLS_DESC_HI20); + CASE(R_LARCH_TLS_DESC_LO12); + CASE(R_LARCH_TLS_DESC64_LO20); + CASE(R_LARCH_TLS_DESC64_HI12); + CASE(R_LARCH_TLS_DESC_LD); + CASE(R_LARCH_TLS_DESC_CALL); + CASE(R_LARCH_TLS_LE_HI20_R); + CASE(R_LARCH_TLS_LE_ADD_R); + CASE(R_LARCH_TLS_LE_LO12_R); + CASE(R_LARCH_TLS_LD_PCREL20_S2); + CASE(R_LARCH_TLS_GD_PCREL20_S2); + CASE(R_LARCH_TLS_DESC_PCREL20_S2); } return unknown_type(r_type); } @@ -1040,4 +1019,4 @@ std::string rel_to_string(u32 r_type) { return rel_to_string(r_type); } -} // namespace mold::elf +} // namespace mold diff --git a/elf/elf.h b/src/elf.h similarity index 95% rename from elf/elf.h rename to src/elf.h index c58fea05..08ca6db2 100644 --- a/elf/elf.h +++ b/src/elf.h @@ -1,13 +1,13 @@ #pragma once -#include "../common/integers.h" +#include "../lib/integers.h" #include #include #include #include -namespace mold::elf { +namespace mold { struct X86_64; struct I386; @@ -24,7 +24,6 @@ struct S390X; struct SPARC64; struct M68K; struct SH4; -struct ALPHA; struct LOONGARCH64; struct LOONGARCH32; @@ -191,6 +190,7 @@ enum : u32 { PT_GNU_EH_FRAME = 0x6474e550, PT_GNU_STACK = 0x6474e551, PT_GNU_RELRO = 0x6474e552, + PT_GNU_PROPERTY = 0x6474e553, PT_OPENBSD_RANDOMIZE = 0x65a3dbe6, PT_ARM_EXIDX = 0x70000001, PT_RISCV_ATTRIBUTES = 0x70000003, @@ -238,7 +238,6 @@ enum : u32 { EM_AARCH64 = 183, EM_RISCV = 243, EM_LOONGARCH = 258, - EM_ALPHA = 0x9026, }; enum : u32 { @@ -342,6 +341,12 @@ enum : u32 { GNU_PROPERTY_X86_FEATURE_1_IBT = 1, GNU_PROPERTY_X86_FEATURE_1_SHSTK = 2, GNU_PROPERTY_X86_FEATURE_1_AND = 0xc0000002, + + GNU_PROPERTY_X86_ISA_1_NEEDED = 0xc0008002, + GNU_PROPERTY_X86_ISA_1_BASELINE = 1, + GNU_PROPERTY_X86_ISA_1_V2 = 2, + GNU_PROPERTY_X86_ISA_1_V3 = 4, + GNU_PROPERTY_X86_ISA_1_V4 = 8, }; enum : u32 { @@ -379,8 +384,6 @@ enum : u32 { enum : u32 { STO_RISCV_VARIANT_CC = 0x80, - STO_ALPHA_NOPV = 0x20, - STO_ALPHA_STD_GPLOAD = 0x22, }; enum : u32 { @@ -1227,42 +1230,6 @@ enum : u32 { R_SH_GOTPLT32 = 168, }; -enum : u32 { - R_ALPHA_NONE = 0, - R_ALPHA_REFLONG = 1, - R_ALPHA_REFQUAD = 2, - R_ALPHA_GPREL32 = 3, - R_ALPHA_LITERAL = 4, - R_ALPHA_LITUSE = 5, - R_ALPHA_GPDISP = 6, - R_ALPHA_BRADDR = 7, - R_ALPHA_HINT = 8, - R_ALPHA_SREL16 = 9, - R_ALPHA_SREL32 = 10, - R_ALPHA_SREL64 = 11, - R_ALPHA_GPRELHIGH = 17, - R_ALPHA_GPRELLOW = 18, - R_ALPHA_GPREL16 = 19, - R_ALPHA_COPY = 24, - R_ALPHA_GLOB_DAT = 25, - R_ALPHA_JMP_SLOT = 26, - R_ALPHA_RELATIVE = 27, - R_ALPHA_BRSGP = 28, - R_ALPHA_TLSGD = 29, - R_ALPHA_TLSLDM = 30, - R_ALPHA_DTPMOD64 = 31, - R_ALPHA_GOTDTPREL = 32, - R_ALPHA_DTPREL64 = 33, - R_ALPHA_DTPRELHI = 34, - R_ALPHA_DTPRELLO = 35, - R_ALPHA_DTPREL16 = 36, - R_ALPHA_GOTTPREL = 37, - R_ALPHA_TPREL64 = 38, - R_ALPHA_TPRELHI = 39, - R_ALPHA_TPRELLO = 40, - R_ALPHA_TPREL16 = 41, -}; - enum : u32 { R_LARCH_NONE = 0, R_LARCH_32 = 1, @@ -1277,6 +1244,8 @@ enum : u32 { R_LARCH_TLS_TPREL32 = 10, R_LARCH_TLS_TPREL64 = 11, R_LARCH_IRELATIVE = 12, + R_LARCH_TLS_DESC32 = 13, + R_LARCH_TLS_DESC64 = 14, R_LARCH_MARK_LA = 20, R_LARCH_MARK_PCREL = 21, R_LARCH_SOP_PUSH_PCREL = 22, @@ -1362,6 +1331,23 @@ enum : u32 { R_LARCH_ADD_ULEB128 = 107, R_LARCH_SUB_ULEB128 = 108, R_LARCH_64_PCREL = 109, + R_LARCH_CALL36 = 110, + R_LARCH_TLS_DESC_PC_HI20 = 111, + R_LARCH_TLS_DESC_PC_LO12 = 112, + R_LARCH_TLS_DESC64_PC_LO20 = 113, + R_LARCH_TLS_DESC64_PC_HI12 = 114, + R_LARCH_TLS_DESC_HI20 = 115, + R_LARCH_TLS_DESC_LO12 = 116, + R_LARCH_TLS_DESC64_LO20 = 117, + R_LARCH_TLS_DESC64_HI12 = 118, + R_LARCH_TLS_DESC_LD = 119, + R_LARCH_TLS_DESC_CALL = 120, + R_LARCH_TLS_LE_HI20_R = 121, + R_LARCH_TLS_LE_ADD_R = 122, + R_LARCH_TLS_LE_LO12_R = 123, + R_LARCH_TLS_LD_PCREL20_S2 = 124, + R_LARCH_TLS_GD_PCREL20_S2 = 125, + R_LARCH_TLS_DESC_PCREL20_S2 = 126, }; // @@ -1786,33 +1772,6 @@ struct ElfSym { ul64 st_size; }; -template <> -struct ElfSym { - bool is_undef() const { return st_shndx == SHN_UNDEF; } - bool is_abs() const { return st_shndx == SHN_ABS; } - bool is_common() const { return st_shndx == SHN_COMMON; } - bool is_weak() const { return st_bind == STB_WEAK; } - bool is_undef_weak() const { return is_undef() && is_weak(); } - - ul32 st_name; - -#ifdef __LITTLE_ENDIAN__ - u8 st_type : 4; - u8 st_bind : 4; - u8 st_visibility : 2; - u8 alpha_st_other : 6; // contains STO_ALPHA_NOPV, STO_ALPHA_STD_GPLOAD or 0 -#else - u8 st_bind : 4; - u8 st_type : 4; - u8 alpha_st_other : 6; - u8 st_visibility : 2; -#endif - - ul16 st_shndx; - ul64 st_value; - ul64 st_size; -}; - template <> struct ElfRel { ElfRel() = default; @@ -1831,7 +1790,7 @@ template <> struct ElfRel { ElfRel() = default; - // Addend is ignored except for base relocations because even though + // Addend is ignored except for base relocations because even though // SH4 is RELA, r_addend is ignored in most cases and works as if it // were REL. ElfRel(u64 offset, u32 type, u32 sym, i64 addend) @@ -1866,7 +1825,6 @@ template concept is_s390x = std::same_as; template concept is_sparc64 = std::same_as; template concept is_m68k = std::same_as; template concept is_sh4 = std::same_as; -template concept is_alpha = std::same_as; template concept is_loongarch64 = std::same_as; template concept is_loongarch32 = std::same_as; @@ -2215,29 +2173,6 @@ struct SH4 { static constexpr u32 R_FUNCALL[] = { R_SH_PLT32 }; }; -struct ALPHA { - static constexpr std::string_view target_name = "alpha"; - static constexpr bool is_64 = true; - static constexpr bool is_le = true; - static constexpr bool is_rela = true; - static constexpr u32 page_size = 65536; - static constexpr u32 e_machine = EM_ALPHA; - static constexpr u32 plt_hdr_size = 0; - static constexpr u32 plt_size = 0; - static constexpr u32 pltgot_size = 0; - static constexpr u8 filler[] = { 0x81, 0x00, 0x00, 0x00 }; // bugchk - - static constexpr u32 R_COPY = R_ALPHA_COPY; - static constexpr u32 R_GLOB_DAT = R_ALPHA_GLOB_DAT; - static constexpr u32 R_JUMP_SLOT = R_ALPHA_JMP_SLOT; - static constexpr u32 R_ABS = R_ALPHA_REFQUAD; - static constexpr u32 R_RELATIVE = R_ALPHA_RELATIVE; - static constexpr u32 R_DTPOFF = R_ALPHA_DTPREL64; - static constexpr u32 R_TPOFF = R_ALPHA_TPREL64; - static constexpr u32 R_DTPMOD = R_ALPHA_DTPMOD64; - static constexpr u32 R_FUNCALL[] = {}; -}; - struct LOONGARCH64 { static constexpr std::string_view target_name = "loongarch64"; static constexpr bool is_64 = true; @@ -2248,8 +2183,6 @@ struct LOONGARCH64 { static constexpr u32 plt_hdr_size = 32; static constexpr u32 plt_size = 16; static constexpr u32 pltgot_size = 16; - static constexpr u32 thunk_hdr_size = 0; - static constexpr u32 thunk_size = 8; static constexpr u8 filler[] = { 0x00, 0x00, 0x2a, 0x00 }; // break 0 static constexpr u32 R_COPY = R_LARCH_COPY; @@ -2261,7 +2194,8 @@ struct LOONGARCH64 { static constexpr u32 R_DTPOFF = R_LARCH_TLS_DTPREL64; static constexpr u32 R_TPOFF = R_LARCH_TLS_TPREL64; static constexpr u32 R_DTPMOD = R_LARCH_TLS_DTPMOD64; - static constexpr u32 R_FUNCALL[] = { R_LARCH_B26 }; + static constexpr u32 R_TLSDESC = R_LARCH_TLS_DESC64; + static constexpr u32 R_FUNCALL[] = { R_LARCH_B26, R_LARCH_CALL36 }; }; struct LOONGARCH32 { @@ -2274,8 +2208,6 @@ struct LOONGARCH32 { static constexpr u32 plt_hdr_size = 32; static constexpr u32 plt_size = 16; static constexpr u32 pltgot_size = 16; - static constexpr u32 thunk_hdr_size = 0; - static constexpr u32 thunk_size = 8; static constexpr u8 filler[] = { 0x00, 0x00, 0x2a, 0x00 }; // break 0 static constexpr u32 R_COPY = R_LARCH_COPY; @@ -2287,7 +2219,8 @@ struct LOONGARCH32 { static constexpr u32 R_DTPOFF = R_LARCH_TLS_DTPREL32; static constexpr u32 R_TPOFF = R_LARCH_TLS_TPREL32; static constexpr u32 R_DTPMOD = R_LARCH_TLS_DTPMOD32; - static constexpr u32 R_FUNCALL[] = { R_LARCH_B26 }; + static constexpr u32 R_TLSDESC = R_LARCH_TLS_DESC32; + static constexpr u32 R_FUNCALL[] = { R_LARCH_B26, R_LARCH_CALL36 }; }; -} // namespace mold::elf +} // namespace mold diff --git a/common/filetype.h b/src/filetype.h similarity index 64% rename from common/filetype.h rename to src/filetype.h index b2c46578..50b605da 100644 --- a/common/filetype.h +++ b/src/filetype.h @@ -1,7 +1,7 @@ #pragma once -#include "common.h" -#include "../elf/elf.h" +#include "../lib/common.h" +#include "elf.h" namespace mold { @@ -10,21 +10,14 @@ enum class FileType { EMPTY, ELF_OBJ, ELF_DSO, - MACH_OBJ, - MACH_EXE, - MACH_DYLIB, - MACH_BUNDLE, - MACH_UNIVERSAL, AR, THIN_AR, - TAPI, TEXT, GCC_LTO_OBJ, LLVM_BITCODE, }; -template -bool is_text_file(MappedFile *mf) { +inline bool is_text_file(MappedFile *mf) { auto istext = [](char c) { return isprint(c) || c == '\n' || c == '\t'; }; @@ -34,10 +27,8 @@ bool is_text_file(MappedFile *mf) { istext(data[2]) && istext(data[3]); } -template -inline bool is_gcc_lto_obj(Context &ctx, MappedFile *mf) { - using namespace mold::elf; - +template +inline bool is_gcc_lto_obj(MappedFile *mf, bool has_plugin) { const char *data = mf->get_contents().data(); ElfEhdr &ehdr = *(ElfEhdr *)data; ElfShdr *sh_begin = (ElfShdr *)(data + ehdr.e_shoff); @@ -54,7 +45,7 @@ inline bool is_gcc_lto_obj(Context &ctx, MappedFile *mf) { // the LTO linker plugin is available and falls back as regular // objects otherwise. GCC FAT LTO object can be identified by the // presence of `.gcc.lto_.symtab` section. - if (!ctx.arg.plugin.empty()) { + if (has_plugin) { std::string_view name = data + shdrs[shstrtab_idx].sh_offset + sec.sh_name; if (name.starts_with(".gnu.lto_.symtab.")) return true; @@ -89,11 +80,10 @@ inline bool is_gcc_lto_obj(Context &ctx, MappedFile *mf) { return false; } -template -FileType get_file_type(Context &ctx, MappedFile *mf) { - using namespace elf; - +template +FileType get_file_type(Context &ctx, MappedFile *mf) { std::string_view data = mf->get_contents(); + bool has_plugin = !ctx.arg.plugin.empty(); if (data.empty()) return FileType::EMPTY; @@ -106,10 +96,10 @@ FileType get_file_type(Context &ctx, MappedFile *mf) { if (ehdr.e_type == ET_REL) { if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) { - if (is_gcc_lto_obj(ctx, mf)) + if (is_gcc_lto_obj(mf, has_plugin)) return FileType::GCC_LTO_OBJ; } else { - if (is_gcc_lto_obj(ctx, mf)) + if (is_gcc_lto_obj(mf, has_plugin)) return FileType::GCC_LTO_OBJ; } return FileType::ELF_OBJ; @@ -122,10 +112,10 @@ FileType get_file_type(Context &ctx, MappedFile *mf) { if (ehdr.e_type == ET_REL) { if (ehdr.e_ident[EI_CLASS] == ELFCLASS32) { - if (is_gcc_lto_obj(ctx, mf)) + if (is_gcc_lto_obj(mf, has_plugin)) return FileType::GCC_LTO_OBJ; } else { - if (is_gcc_lto_obj(ctx, mf)) + if (is_gcc_lto_obj(mf, has_plugin)) return FileType::GCC_LTO_OBJ; } return FileType::ELF_OBJ; @@ -137,28 +127,10 @@ FileType get_file_type(Context &ctx, MappedFile *mf) { return FileType::UNKNOWN; } - if (data.starts_with("\xcf\xfa\xed\xfe")) { - switch (*(ul32 *)(data.data() + 12)) { - case 1: // MH_OBJECT - return FileType::MACH_OBJ; - case 2: // MH_EXECUTE - return FileType::MACH_EXE; - case 6: // MH_DYLIB - return FileType::MACH_DYLIB; - case 8: // MH_BUNDLE - return FileType::MACH_BUNDLE; - } - return FileType::UNKNOWN; - } - if (data.starts_with("!\n")) return FileType::AR; if (data.starts_with("!\n")) return FileType::THIN_AR; - if (data.starts_with("--- !tapi-tbd")) - return FileType::TAPI; - if (data.starts_with("\xca\xfe\xba\xbe")) - return FileType::MACH_UNIVERSAL; if (is_text_file(mf)) return FileType::TEXT; if (data.starts_with("\xde\xc0\x17\x0b")) @@ -168,29 +140,23 @@ FileType get_file_type(Context &ctx, MappedFile *mf) { return FileType::UNKNOWN; } -inline std::string filetype_to_string(FileType type) { - switch (type) { - case FileType::UNKNOWN: return "UNKNOWN"; - case FileType::EMPTY: return "EMPTY"; - case FileType::ELF_OBJ: return "ELF_OBJ"; - case FileType::ELF_DSO: return "ELF_DSO"; - case FileType::MACH_EXE: return "MACH_EXE"; - case FileType::MACH_OBJ: return "MACH_OBJ"; - case FileType::MACH_DYLIB: return "MACH_DYLIB"; - case FileType::MACH_BUNDLE: return "MACH_BUNDLE"; - case FileType::MACH_UNIVERSAL: return "MACH_UNIVERSAL"; - case FileType::AR: return "AR"; - case FileType::THIN_AR: return "THIN_AR"; - case FileType::TAPI: return "TAPI"; - case FileType::TEXT: return "TEXT"; - case FileType::GCC_LTO_OBJ: return "GCC_LTO_OBJ"; - case FileType::LLVM_BITCODE: return "LLVM_BITCODE"; - } - return "UNKNOWN"; -} - inline std::ostream &operator<<(std::ostream &out, FileType type) { - out << filetype_to_string(type); + auto to_string = [&] { + switch (type) { + case FileType::UNKNOWN: return "UNKNOWN"; + case FileType::EMPTY: return "EMPTY"; + case FileType::ELF_OBJ: return "ELF_OBJ"; + case FileType::ELF_DSO: return "ELF_DSO"; + case FileType::AR: return "AR"; + case FileType::THIN_AR: return "THIN_AR"; + case FileType::TEXT: return "TEXT"; + case FileType::GCC_LTO_OBJ: return "GCC_LTO_OBJ"; + case FileType::LLVM_BITCODE: return "LLVM_BITCODE"; + default: return "UNKNOWN"; + } + }; + + out << to_string(); return out; } diff --git a/elf/gc-sections.cc b/src/gc-sections.cc similarity index 99% rename from elf/gc-sections.cc rename to src/gc-sections.cc index 34334a7c..efc6cd6d 100644 --- a/elf/gc-sections.cc +++ b/src/gc-sections.cc @@ -7,7 +7,7 @@ #include #include -namespace mold::elf { +namespace mold { template static bool should_keep(const InputSection &isec) { @@ -172,4 +172,4 @@ using E = MOLD_TARGET; template void gc_sections(Context &ctx); -} // namespace mold::elf +} // namespace mold diff --git a/elf/gdb-index.cc b/src/gdb-index.cc similarity index 99% rename from elf/gdb-index.cc rename to src/gdb-index.cc index d13ec49d..a87b7691 100644 --- a/elf/gdb-index.cc +++ b/src/gdb-index.cc @@ -60,7 +60,7 @@ #include #include -namespace mold::elf { +namespace mold { enum DwarfKind { DWARF2_32, DWARF5_32, DWARF2_64, DWARF5_64 }; @@ -791,4 +791,4 @@ using E = MOLD_TARGET; template void write_gdb_index(Context &); -} // namespace mold::elf +} // namespace mold diff --git a/elf/icf.cc b/src/icf.cc similarity index 99% rename from elf/icf.cc rename to src/icf.cc index cc64c626..cdf70760 100644 --- a/elf/icf.cc +++ b/src/icf.cc @@ -65,7 +65,7 @@ // conditions. #include "mold.h" -#include "../common/siphash.h" +#include "../lib/siphash.h" #include #include @@ -91,7 +91,7 @@ template <> struct hash { }; } -namespace mold::elf { +namespace mold { static u8 hmac_key[16]; @@ -599,7 +599,7 @@ void icf_sections(Context &ctx) { static Counter eliminated("icf_eliminated"); tbb::parallel_for_each(ctx.objs, [](ObjectFile *file) { for (std::unique_ptr> &isec : file->sections) { - if (isec && isec->is_alive && isec->is_killed_by_icf()) { + if (isec && isec->is_alive && isec->icf_removed()) { isec->kill(); eliminated++; } @@ -612,4 +612,4 @@ using E = MOLD_TARGET; template void icf_sections(Context &ctx); -} // namespace mold::elf +} // namespace mold diff --git a/elf/input-files.cc b/src/input-files.cc similarity index 88% rename from elf/input-files.cc rename to src/input-files.cc index 6d8ad8f4..afe1fc1e 100644 --- a/elf/input-files.cc +++ b/src/input-files.cc @@ -8,7 +8,7 @@ # include #endif -namespace mold::elf { +namespace mold { // If we haven't seen the same `key` before, create a new instance // of Symbol and returns it. Otherwise, returns the previously- @@ -243,7 +243,7 @@ static bool is_known_section_type(const ElfShdr &shdr) { return true; if (SHT_LOOS <= ty && ty <= SHT_HIOS && !(flags & SHF_OS_NONCONFORMING)) return true; - if (is_x86 && ty == SHT_X86_64_UNWIND) + if (is_x86_64 && ty == SHT_X86_64_UNWIND) return true; if (is_arm32 && (ty == SHT_ARM_EXIDX || ty == SHT_ARM_ATTRIBUTES)) return true; @@ -564,11 +564,15 @@ void ObjectFile::parse_ehframe(Context &ctx) { for (i64 i = 0; i < fdes.size();) { InputSection *isec = get_isec(fdes[i]); assert(isec->fde_begin == -1); - isec->fde_begin = i++; - while (i < fdes.size() && isec == get_isec(fdes[i])) - i++; - isec->fde_end = i; + if (isec->is_alive) { + isec->fde_begin = i++; + while (i < fdes.size() && isec == get_isec(fdes[i])) + i++; + isec->fde_end = i; + } else { + fdes[i++].is_alive = false; + } } } @@ -677,102 +681,27 @@ void ObjectFile::sort_relocations(Context &ctx) { } } -static size_t find_null(std::string_view data, i64 pos, i64 entsize) { - if (entsize == 1) - return data.find('\0', pos); - - for (; pos <= data.size() - entsize; pos += entsize) - if (data.substr(pos, entsize).find_first_not_of('\0') == data.npos) - return pos; - - return data.npos; -} - -// Mergeable sections (sections with SHF_MERGE bit) typically contain -// string literals. Linker is expected to split the section contents -// into null-terminated strings, merge them with mergeable strings -// from other object files, and emit uniquified strings to an output -// file. -// -// This mechanism reduces the size of an output file. If two source -// files happen to contain the same string literal, the output will -// contain only a single copy of it. -// -// It is less common than string literals, but mergeable sections can -// contain fixed-sized read-only records too. -// -// This function splits the section contents into small pieces that we -// call "section fragments". Section fragment is a unit of merging. -// -// We do not support mergeable sections that have relocations. template -static std::unique_ptr> -split_section(Context &ctx, InputSection &sec) { - if (!sec.is_alive || sec.relsec_idx != -1 || sec.sh_size == 0) - return nullptr; - - const ElfShdr &shdr = sec.shdr(); - if (!(shdr.sh_flags & SHF_MERGE)) - return nullptr; - - i64 entsize = shdr.sh_entsize; - if (entsize == 0) - entsize = (shdr.sh_flags & SHF_STRINGS) ? 1 : (int)shdr.sh_addralign; - - if (entsize == 0) - return nullptr; - - i64 addralign = shdr.sh_addralign; - if (addralign == 0) - addralign = 1; - - std::unique_ptr> m(new MergeableSection); - m->parent = MergedSection::get_instance(ctx, sec.name(), shdr.sh_type, - shdr.sh_flags, entsize, addralign); - m->p2align = sec.p2align; - - // If thes section contents are compressed, uncompress them. - sec.uncompress(ctx); - - std::string_view data = sec.contents; - m->contents = sec.contents; - - if (data.size() > UINT32_MAX) - Fatal(ctx) << sec << ": mergeable section too large"; - - // Split sections - if (shdr.sh_flags & SHF_STRINGS) { - for (i64 pos = 0; pos < data.size();) { - m->frag_offsets.push_back(pos); - size_t end = find_null(data, pos, entsize); - if (end == data.npos) - Fatal(ctx) << sec << ": string is not null terminated"; - pos = end + entsize; - } - } else { - if (data.size() % entsize) - Fatal(ctx) << sec << ": section size is not multiple of sh_entsize"; - m->frag_offsets.reserve(data.size() / entsize); +void ObjectFile::convert_mergeable_sections(Context &ctx) { + // Convert InputSections to MergeableSections + for (i64 i = 0; i < this->sections.size(); i++) { + InputSection *isec = this->sections[i].get(); + if (!isec || isec->sh_size == 0 || isec->relsec_idx != -1) + continue; - for (i64 pos = 0; pos < data.size(); pos += entsize) - m->frag_offsets.push_back(pos); - } + const ElfShdr &shdr = isec->shdr(); + if (!(shdr.sh_flags & SHF_MERGE)) + continue; - // Compute hashes for section pieces - HyperLogLog estimator; - m->hashes.reserve(m->frag_offsets.size()); + MergedSection *parent = + MergedSection::get_instance(ctx, isec->name(), shdr); - for (i64 i = 0; i < m->frag_offsets.size(); i++) { - u64 hash = hash_string(m->get_contents(i)); - m->hashes.push_back(hash); - estimator.insert(hash); + if (parent) { + this->mergeable_sections[i] = + std::make_unique>(ctx, *parent, this->sections[i]); + this->sections[i] = nullptr; + } } - - m->parent->estimator.merge(estimator); - - static Counter counter("string_fragments"); - counter += m->frag_offsets.size(); - return m; } // Usually a section is an atomic unit of inclusion or exclusion. @@ -811,43 +740,17 @@ split_section(Context &ctx, InputSection &sec) { // section piece in a section, but it doesn't do for any other types // of symbols. // -// In mold, we attach symbols to section pieces. If a relocation refers -// to a section symbol, and that symbol's section is a mergeable one, -// we create a new dummy symbol for a section piece and redirect the -// relocation to this new symbol. If a non-section symbol refers to a -// section piece, the section piece is attached to the symbol. -template -void ObjectFile::initialize_mergeable_sections(Context &ctx) { - mergeable_sections.resize(sections.size()); - - for (i64 i = 0; i < sections.size(); i++) { - if (std::unique_ptr> &isec = sections[i]) { - if (std::unique_ptr> m = split_section(ctx, *isec)) { - mergeable_sections[i] = std::move(m); - isec->is_alive = false; - } - } - } -} - +// Section garbage collection and Identical Code Folding work on graphs +// where sections or section pieces are vertices and relocations are +// edges. To make it easy to handle them, we rewrite symbols and +// relocations so that each non-absolute symbol always refers to either +// a non-mergeable section or a section piece. +// +// We do that only for SHF_ALLOC sections because GC and ICF work only +// on memory-allocated sections. Non-memory-allocated mergeable sections +// are not handled here for performance reasons. template -void ObjectFile::resolve_section_pieces(Context &ctx) { - for (std::unique_ptr> &m : mergeable_sections) { - if (m) { - m->fragments.reserve(m->frag_offsets.size()); - - for (i64 i = 0; i < m->frag_offsets.size(); i++) { - SectionFragment *frag = - m->parent->insert(ctx, m->get_contents(i), m->hashes[i], m->p2align); - m->fragments.push_back(frag); - } - - // Reclaim memory as we'll never use this vector again - m->hashes.clear(); - m->hashes.shrink_to_fit(); - } - } - +void ObjectFile::reattach_section_pieces(Context &ctx) { // Attach section pieces to symbols. for (i64 i = 1; i < this->elf_syms.size(); i++) { Symbol &sym = *this->symbols[i]; @@ -856,8 +759,9 @@ void ObjectFile::resolve_section_pieces(Context &ctx) { if (esym.is_abs() || esym.is_common() || esym.is_undef()) continue; - std::unique_ptr> &m = mergeable_sections[get_shndx(esym)]; - if (!m || m->fragments.empty()) + i64 shndx = get_shndx(esym); + std::unique_ptr> &m = mergeable_sections[shndx]; + if (!m || !m->parent.resolved) continue; SectionFragment *frag; @@ -874,49 +778,51 @@ void ObjectFile::resolve_section_pieces(Context &ctx) { // Compute the size of frag_syms. i64 nfrag_syms = 0; for (std::unique_ptr> &isec : sections) - if (isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC)) + if (isec && (isec->shdr().sh_flags & SHF_ALLOC)) for (ElfRel &r : isec->get_rels(ctx)) if (const ElfSym &esym = this->elf_syms[r.r_sym]; - esym.st_type == STT_SECTION && mergeable_sections[get_shndx(esym)]) - nfrag_syms++; + esym.st_type == STT_SECTION) + if (mergeable_sections[get_shndx(esym)]) + nfrag_syms++; this->frag_syms.resize(nfrag_syms); - // For each relocation referring a mergeable section symbol, we create - // a new dummy non-section symbol and redirect the relocation to the - // newly-created symbol. + // For each relocation referring to a mergeable section symbol, we + // create a new dummy non-section symbol and redirect the relocation + // to the newly created symbol. i64 idx = 0; for (std::unique_ptr> &isec : sections) { - if (!isec || !isec->is_alive || !(isec->shdr().sh_flags & SHF_ALLOC)) - continue; - - for (ElfRel &r : isec->get_rels(ctx)) { - const ElfSym &esym = this->elf_syms[r.r_sym]; - if (esym.st_type != STT_SECTION) - continue; - - std::unique_ptr> &m = mergeable_sections[get_shndx(esym)]; - if (!m) - continue; - - i64 r_addend = get_addend(*isec, r); - - SectionFragment *frag; - i64 in_frag_offset; - std::tie(frag, in_frag_offset) = m->get_fragment(esym.st_value + r_addend); + if (isec && (isec->shdr().sh_flags & SHF_ALLOC)) { + for (ElfRel &r : isec->get_rels(ctx)) { + const ElfSym &esym = this->elf_syms[r.r_sym]; + if (esym.st_type != STT_SECTION) + continue; - if (!frag) - Fatal(ctx) << *this << ": bad relocation at " << r.r_sym; + i64 shndx = get_shndx(esym); + std::unique_ptr> &m = mergeable_sections[shndx]; + if (!m) + continue; - Symbol &sym = this->frag_syms[idx]; - sym.file = this; - sym.set_name(""); - sym.sym_idx = r.r_sym; - sym.visibility = STV_HIDDEN; - sym.set_frag(frag); - sym.value = in_frag_offset - r_addend; - r.r_sym = this->elf_syms.size() + idx; - idx++; + assert(m->parent.resolved); + + i64 r_addend = get_addend(*isec, r); + SectionFragment *frag; + i64 in_frag_offset; + std::tie(frag, in_frag_offset) = m->get_fragment(esym.st_value + r_addend); + + if (!frag) + Fatal(ctx) << *this << ": bad relocation at " << r.r_sym; + + Symbol &sym = this->frag_syms[idx]; + sym.file = this; + sym.set_name(""); + sym.sym_idx = r.r_sym; + sym.visibility = STV_HIDDEN; + sym.set_frag(frag); + sym.value = in_frag_offset - r_addend; + r.r_sym = this->elf_syms.size() + idx; + idx++; + } } } @@ -929,6 +835,8 @@ void ObjectFile::resolve_section_pieces(Context &ctx) { template void ObjectFile::parse(Context &ctx) { sections.resize(this->elf_sections.size()); + mergeable_sections.resize(sections.size()); + symtab_sec = this->find_section(SHT_SYMTAB); if (symtab_sec) { @@ -945,7 +853,6 @@ void ObjectFile::parse(Context &ctx) { initialize_sections(ctx); initialize_symbols(ctx); sort_relocations(ctx); - parse_ehframe(ctx); } // Symbols with higher priorities overwrites symbols with lower priorities. @@ -1142,8 +1049,6 @@ void ObjectFile::convert_common_symbols(Context &ctx) { continue; Symbol &sym = *this->symbols[i]; - std::scoped_lock lock(sym.mu); - if (sym.file != this) { if (ctx.arg.warn_common) Warn(ctx) << *this << ": multiple common symbols: " << sym; @@ -1164,7 +1069,6 @@ void ObjectFile::convert_common_symbols(Context &ctx) { i64 idx = this->elf_sections.size() + elf_sections2.size() - 1; auto isec = std::make_unique>(ctx, *this, idx); - sym.file = this; sym.set_input_section(isec.get()); sym.value = 0; sym.sym_idx = i; @@ -1199,9 +1103,6 @@ static bool should_write_to_local_symtab(Context &ctx, Symbol &sym) { template void ObjectFile::compute_symtab_size(Context &ctx) { - if (ctx.arg.strip_all) - return; - this->output_sym_indices.resize(this->elf_syms.size(), -1); auto is_alive = [&](Symbol &sym) -> bool { @@ -1299,12 +1200,6 @@ SharedFile *SharedFile::create(Context &ctx, MappedFile *mf) { return obj; } -template -SharedFile::SharedFile(Context &ctx, MappedFile *mf) - : InputFile(ctx, mf) { - this->is_alive = !ctx.as_needed; -} - template std::string SharedFile::get_soname(Context &ctx) { if (ElfShdr *sec = this->find_section(SHT_DYNAMIC)) @@ -1367,6 +1262,32 @@ void SharedFile::parse(Context &ctx) { counter += this->elf_syms.size(); } +template +std::vector SharedFile::get_dt_needed(Context &ctx) { + // Get the contents of the dynamic segment + std::span> dynamic; + for (ElfPhdr &phdr : this->get_phdrs()) + if (phdr.p_type == PT_DYNAMIC) + dynamic = {(Word *)(this->mf->data + phdr.p_offset), + (size_t)phdr.p_memsz / sizeof(Word)}; + + // Find a string table + char *strtab = nullptr; + for (i64 i = 0; i < dynamic.size(); i += 2) + if (dynamic[i] == DT_STRTAB) + strtab = (char *)this->mf->data + dynamic[i + 1]; + + if (!strtab) + return {}; + + // Find all DT_NEEDED entries + std::vector vec; + for (i64 i = 0; i < dynamic.size(); i += 2) + if (dynamic[i] == DT_NEEDED) + vec.push_back(strtab + dynamic[i + 1]); + return vec; +} + // Symbol versioning is a GNU extension to the ELF file format. I don't // particularly like the feature as it complicates the semantics of // dynamic linking, but we need to support it anyway because it is @@ -1431,7 +1352,8 @@ void SharedFile::resolve_symbols(Context &ctx) { for (i64 i = 0; i < this->symbols.size(); i++) { Symbol &sym = *this->symbols[i]; const ElfSym &esym = this->elf_syms[i]; - if (esym.is_undef()) + + if (esym.is_undef() || sym.skip_dso) continue; std::scoped_lock lock(sym.mu); @@ -1458,7 +1380,7 @@ SharedFile::mark_live_objects(Context &ctx, if (sym.is_traced) print_trace_symbol(ctx, *this, esym, sym); - if (esym.is_undef() && !esym.is_weak() && sym.file && !sym.file->is_dso && + if (esym.is_undef() && !esym.is_weak() && sym.file && !sym.file->is_alive.test_and_set()) { feeder(sym.file); @@ -1524,9 +1446,6 @@ bool SharedFile::is_readonly(Symbol *sym) { template void SharedFile::compute_symtab_size(Context &ctx) { - if (ctx.arg.strip_all) - return; - this->output_sym_indices.resize(this->elf_syms.size(), -1); // Compute the size of global symbols. @@ -1576,4 +1495,4 @@ template std::string_view demangle(const Symbol &); template std::ostream &operator<<(std::ostream &, const Symbol &); template std::ostream &operator<<(std::ostream &, const InputFile &); -} // namespace mold::elf +} // namespace mold diff --git a/elf/input-sections.cc b/src/input-sections.cc similarity index 51% rename from elf/input-sections.cc rename to src/input-sections.cc index 8383a7fd..399c80a9 100644 --- a/elf/input-sections.cc +++ b/src/input-sections.cc @@ -4,12 +4,7 @@ #include #include -namespace mold::elf { - -typedef enum { - NONE, ERROR, COPYREL, DYN_COPYREL, PLT, CPLT, DYN_CPLT, DYNREL, - BASEREL, IFUNC_DYNREL, -} Action; +namespace mold { static i64 to_p2align(u64 alignment) { if (alignment == 0) @@ -106,125 +101,31 @@ void InputSection::copy_contents(Context &ctx, u8 *buf) { } } -template -static bool -is_relr_reloc(Context &ctx, InputSection &isec, const ElfRel &rel) { - ElfShdr shdr = isec.shdr(); - return ctx.arg.pack_dyn_relocs_relr && - !(shdr.sh_flags & SHF_EXECINSTR) && - shdr.sh_addralign % sizeof(Word) == 0 && - rel.r_offset % sizeof(Word) == 0; -} +typedef enum : u8 { NONE, ERROR, COPYREL, PLT, CPLT } Action; template -static void scan_rel(Context &ctx, InputSection &isec, Symbol &sym, - const ElfRel &rel, Action action) { - bool writable = (isec.shdr().sh_flags & SHF_WRITE); - - auto error = [&] { - std::string msg = sym.is_absolute() ? "-fno-PIC" : "-fPIC"; - Error(ctx) << isec << ": " << rel << " relocation at offset 0x" - << std::hex << rel.r_offset << " against symbol `" - << sym << "' can not be used; recompile with " << msg; - }; - - auto check_textrel = [&] { - if (!writable) { - if (ctx.arg.z_text) { - error(); - } else if (ctx.arg.warn_textrel) { - Warn(ctx) << isec << ": relocation against symbol `" << sym - << "' in read-only section"; - } - ctx.has_textrel = true; - } - }; - - auto copyrel = [&] { - assert(sym.is_imported); - if (sym.esym().st_visibility == STV_PROTECTED) { - Error(ctx) << isec - << ": cannot make copy relocation for protected symbol '" << sym - << "', defined in " << *sym.file << "; recompile with -fPIC"; - } - sym.flags |= NEEDS_COPYREL; - }; - - auto dynrel = [&] { - check_textrel(); - isec.file.num_dynrel++; - }; - +static void do_action(Context &ctx, Action action, InputSection &isec, + Symbol &sym, const ElfRel &rel) { switch (action) { case NONE: break; case ERROR: - // Print out the "recompile with -fPIC" error message. - error(); + Error(ctx) << isec << ": " << rel << " relocation at offset 0x" + << std::hex << rel.r_offset << " against symbol `" + << sym << "' can not be used; recompile with -fPIC"; break; case COPYREL: - // Create a copy relocation. - if (!ctx.arg.z_copyreloc) - error(); - copyrel(); - break; - case DYN_COPYREL: - // Same as COPYREL but try to avoid creating a copy relocation by - // creating a dynamic relocation instead if the relocation is in - // a writable section. - // - // GHC (Glasgow Haskell Compiler) places a small amount of data in - // .text before each function and access that data with a fixed - // offset. The function breaks if we copy-relocate the data. For such - // programs, we should avoid copy relocations if possible. - // - // Besides GHC, copy relocation is a hacky solution, so if we can - // represent a relocation either with copyrel or dynrel, we prefer - // dynamic relocation. - if (writable || !ctx.arg.z_copyreloc) - dynrel(); - else - copyrel(); + // Create a copy relocation + sym.flags |= NEEDS_COPYREL; break; case PLT: - // Create a PLT entry. + // Create a PLT entry sym.flags |= NEEDS_PLT; break; case CPLT: - // Create a canonical PLT entry. + // Create a canonical PLT entry sym.flags |= NEEDS_CPLT; break; - case DYN_CPLT: - // Same as CPLT but try to avoid creating a canonical PLT creating by - // creating a dynamic relocation instead if the relocation is in a - // writable section. The motivation behind it is hte same as DYN_COPYREL. - if (writable) - dynrel(); - else - sym.flags |= NEEDS_CPLT; - break; - case DYNREL: - // Create a dynamic relocation. - dynrel(); - break; - case BASEREL: - // Create a base relocation. - check_textrel(); - if (!is_relr_reloc(ctx, isec, rel)) - isec.file.num_dynrel++; - break; - case IFUNC_DYNREL: - // Create an IRELATIVE relocation for a GNU ifunc symbol. - // - // We usually create an IRELATIVE relocation in .got for each ifunc. - // However, if a statically-initialized pointer is initialized to an - // ifunc's address, we have no choice other than emitting an IRELATIVE - // relocation for each such pointer. - dynrel(); - ctx.num_ifunc_dynrels++; - break; - default: - unreachable(); } } @@ -249,102 +150,44 @@ static inline i64 get_sym_type(Symbol &sym) { } template -static Action get_pcrel_action(Context &ctx, Symbol &sym) { +void InputSection::scan_pcrel(Context &ctx, Symbol &sym, + const ElfRel &rel) { // This is for PC-relative relocations (e.g. R_X86_64_PC32). // We cannot promote them to dynamic relocations because the dynamic // linker generally does not support PC-relative relocations. - static Action table[3][4] = { + static Action table[][4] = { // Absolute Local Imported data Imported code { ERROR, NONE, ERROR, PLT }, // Shared object - { ERROR, NONE, COPYREL, PLT }, // Position-independent exec + { ERROR, NONE, COPYREL, CPLT }, // Position-independent exec { NONE, NONE, COPYREL, CPLT }, // Position-dependent exec }; - return table[get_output_type(ctx)][get_sym_type(sym)]; + Action action = table[get_output_type(ctx)][get_sym_type(sym)]; + do_action(ctx, action, *this, sym, rel); } template -static Action get_absrel_action(Context &ctx, Symbol &sym) { +void InputSection::scan_absrel(Context &ctx, Symbol &sym, + const ElfRel &rel) { // This is a decision table for absolute relocations that is smaller // than the pointer size (e.g. R_X86_64_32). Since the dynamic linker // generally does not support dynamic relocations smaller than the // pointer size, we need to report an error if a relocation cannot be // resolved at link-time. - static Action table[3][4] = { + static Action table[][4] = { // Absolute Local Imported data Imported code { NONE, ERROR, ERROR, ERROR }, // Shared object { NONE, ERROR, ERROR, ERROR }, // Position-independent exec { NONE, NONE, COPYREL, CPLT }, // Position-dependent exec }; - return table[get_output_type(ctx)][get_sym_type(sym)]; -} - -template -static Action get_dyn_absrel_action(Context &ctx, Symbol &sym) { - if (sym.is_ifunc()) - return sym.is_pde_ifunc(ctx) ? NONE : IFUNC_DYNREL; - - // This is a decision table for absolute relocations for the pointer - // size data (e.g. R_X86_64_64). Unlike the absrel_table, we can emit - // a dynamic relocation if we cannot resolve an address at link-time. - static Action table[3][4] = { - // Absolute Local Imported data Imported code - { NONE, BASEREL, DYNREL, DYNREL }, // Shared object - { NONE, BASEREL, DYNREL, DYNREL }, // Position-independent exec - { NONE, NONE, DYN_COPYREL, DYN_CPLT }, // Position-dependent exec - }; - - return table[get_output_type(ctx)][get_sym_type(sym)]; -} - -template -static Action get_ppc64_toc_action(Context &ctx, Symbol &sym) { - if (sym.is_ifunc()) - return IFUNC_DYNREL; - - // As a special case, we do not create copy relocations nor canonical - // PLTs for .toc sections. PPC64's .toc is a compiler-generated - // GOT-like section, and no user-generated code directly uses values - // in it. - static Action table[3][4] = { - // Absolute Local Imported data Imported code - { NONE, BASEREL, DYNREL, DYNREL }, // Shared object - { NONE, BASEREL, DYNREL, DYNREL }, // Position-independent exec - { NONE, NONE, DYNREL, DYNREL }, // Position-dependent exec - }; - - return table[get_output_type(ctx)][get_sym_type(sym)]; -} - -template -void InputSection::scan_pcrel(Context &ctx, Symbol &sym, - const ElfRel &rel) { - scan_rel(ctx, *this, sym, rel, get_pcrel_action(ctx, sym)); -} - -template -void InputSection::scan_absrel(Context &ctx, Symbol &sym, - const ElfRel &rel) { - scan_rel(ctx, *this, sym, rel, get_absrel_action(ctx, sym)); -} - -template -void InputSection::scan_dyn_absrel(Context &ctx, Symbol &sym, - const ElfRel &rel) { - scan_rel(ctx, *this, sym, rel, get_dyn_absrel_action(ctx, sym)); -} - -template -void InputSection::scan_toc_rel(Context &ctx, Symbol &sym, - const ElfRel &rel) { - scan_rel(ctx, *this, sym, rel, get_ppc64_toc_action(ctx, sym)); + Action action = table[get_output_type(ctx)][get_sym_type(sym)]; + do_action(ctx, action, *this, sym, rel); } template void InputSection::scan_tlsdesc(Context &ctx, Symbol &sym) { - if (ctx.arg.is_static || - (ctx.arg.relax && sym.is_tprel_linktime_const(ctx))) { + if (ctx.arg.static_ || (ctx.arg.relax && sym.is_tprel_linktime_const(ctx))) { // Relax TLSDESC to Local Exec. In this case, we directly materialize // a TP-relative offset, so no dynamic relocation is needed. // @@ -373,92 +216,41 @@ void InputSection::check_tlsle(Context &ctx, Symbol &sym, << " recompile with -fPIC"; } -template -static void apply_absrel(Context &ctx, InputSection &isec, - Symbol &sym, const ElfRel &rel, u8 *loc, - u64 S, i64 A, u64 P, ElfRel *&dynrel, - Action action) { - bool writable = (isec.shdr().sh_flags & SHF_WRITE); - - auto emit_abs_dynrel = [&] { - *dynrel++ = ElfRel(P, E::R_ABS, sym.get_dynsym_idx(ctx), A); - if (ctx.arg.apply_dynamic_relocs) - *(Word *)loc = A; - }; - - switch (action) { - case COPYREL: - case CPLT: - case NONE: - *(Word *)loc = S + A; - break; - case BASEREL: - if (is_relr_reloc(ctx, isec, rel)) { - *(Word *)loc = S + A; - } else { - *dynrel++ = ElfRel(P, E::R_RELATIVE, 0, S + A); - if (ctx.arg.apply_dynamic_relocs) - *(Word *)loc = S + A; - } - break; - case DYN_COPYREL: - if (writable || !ctx.arg.z_copyreloc) - emit_abs_dynrel(); - else - *(Word *)loc = S + A; - break; - case DYN_CPLT: - if (writable) - emit_abs_dynrel(); - else - *(Word *)loc = S + A; - break; - case DYNREL: - emit_abs_dynrel(); - break; - case IFUNC_DYNREL: - if constexpr (supports_ifunc) { - u64 addr = sym.get_addr(ctx, NO_PLT) + A; - *dynrel++ = ElfRel(P, E::R_IRELATIVE, 0, addr); - if (ctx.arg.apply_dynamic_relocs) - *(Word *)loc = addr; - } else { - unreachable(); - } - break; - default: - unreachable(); - } -} - -template -void InputSection::apply_dyn_absrel(Context &ctx, Symbol &sym, - const ElfRel &rel, u8 *loc, - u64 S, i64 A, u64 P, - ElfRel **dynrel) { - apply_absrel(ctx, *this, sym, rel, loc, S, A, P, *dynrel, - get_dyn_absrel_action(ctx, sym)); -} - -template -void InputSection::apply_toc_rel(Context &ctx, Symbol &sym, - const ElfRel &rel, u8 *loc, - u64 S, i64 A, u64 P, - ElfRel **dynrel) { - apply_absrel(ctx, *this, sym, rel, loc, S, A, P, *dynrel, - get_ppc64_toc_action(ctx, sym)); -} - template void InputSection::write_to(Context &ctx, u8 *buf) { if (shdr().sh_type == SHT_NOBITS || sh_size == 0) return; - // Copy data - if constexpr (is_riscv) - copy_contents_riscv(ctx, buf); - else + // Copy data. In RISC-V and LoongArch object files, sections are not + // atomic unit of copying because of relaxation. That is, some + // relocations are allowed to remove bytes from the middle of a + // section and shrink the overall size of it. + if constexpr (is_riscv || is_loongarch) { + if (extra.r_deltas.empty()) { + // If a section is not relaxed, we can copy it as a one big chunk. + copy_contents(ctx, buf); + } else { + // A relaxed section is copied piece-wise. + std::span> rels = get_rels(ctx); + u8 *buf2 = buf; + i64 pos = 0; + + for (i64 i = 0; i < rels.size(); i++) { + i64 delta = extra.r_deltas[i + 1] - extra.r_deltas[i]; + if (delta == 0) + continue; + assert(delta > 0); + + const ElfRel &r = rels[i]; + memcpy(buf2, contents.data() + pos, r.r_offset - pos); + buf2 += r.r_offset - pos; + pos = r.r_offset + delta; + } + memcpy(buf2, contents.data() + pos, contents.size() - pos); + } + } else { copy_contents(ctx, buf); + } // Apply relocations if (!ctx.arg.relocatable) { @@ -474,12 +266,14 @@ template std::string_view InputSection::get_func_name(Context &ctx, i64 offset) const { for (Symbol *sym : file.symbols) { - const ElfSym &esym = sym->esym(); - if (esym.st_shndx == shndx && esym.st_type == STT_FUNC && - esym.st_value <= offset && offset < esym.st_value + esym.st_size) { - if (ctx.arg.demangle) - return demangle(*sym); - return sym->name(); + if (sym->file == &file) { + const ElfSym &esym = sym->esym(); + if (esym.st_shndx == shndx && esym.st_type == STT_FUNC && + esym.st_value <= offset && offset < esym.st_value + esym.st_size) { + if (ctx.arg.demangle) + return demangle(*sym); + return sym->name(); + } } } return ""; @@ -530,6 +324,7 @@ bool InputSection::record_undef_error(Context &ctx, const ElfRel &rel) // Every ELF file has an absolute local symbol as its first symbol. // Referring to that symbol is always valid. bool is_undef = esym.is_undef() && !esym.is_weak() && sym.sym_idx; + if (is_undef && sym.esym().is_undef()) { if (ctx.arg.unresolved_symbols == UNRESOLVED_ERROR && !sym.is_imported) { record(); @@ -541,20 +336,105 @@ bool InputSection::record_undef_error(Context &ctx, const ElfRel &rel) } } - // If a protected/hidden undefined symbol is resolved to other .so, - // it's handled as if no symbols were found. - if (sym.file->is_dso && - (sym.visibility == STV_PROTECTED || sym.visibility == STV_HIDDEN)) { - record(); - return true; + return false; +} + +template +MergeableSection::MergeableSection(Context &ctx, MergedSection &parent, + std::unique_ptr> &isec) + : parent(parent), section(std::move(isec)), p2align(section->p2align) { + section->uncompress(ctx); + + std::scoped_lock lock(parent.mu); + parent.members.push_back(this); +} + +static size_t find_null(std::string_view data, i64 pos, i64 entsize) { + if (entsize == 1) + return data.find('\0', pos); + + for (; pos <= data.size() - entsize; pos += entsize) + if (data.substr(pos, entsize).find_first_not_of('\0') == data.npos) + return pos; + + return data.npos; +} + +// Mergeable sections (sections with SHF_MERGE bit) typically contain +// string literals. Linker is expected to split the section contents +// into null-terminated strings, merge them with mergeable strings +// from other object files, and emit uniquified strings to an output +// file. +// +// This mechanism reduces the size of an output file. If two source +// files happen to contain the same string literal, the output will +// contain only a single copy of it. +// +// It is less common than string literals, but mergeable sections can +// contain fixed-sized read-only records too. +// +// This function splits the section contents into small pieces that we +// call "section fragments". Section fragment is a unit of merging. +// +// We do not support mergeable sections that have relocations. +template +void MergeableSection::split_contents(Context &ctx) { + std::string_view data = section->contents; + if (data.size() > UINT32_MAX) + Fatal(ctx) << *section + << ": mergeable section too large"; + + i64 entsize = parent.shdr.sh_entsize; + + // Split sections + if (parent.shdr.sh_flags & SHF_STRINGS) { + for (i64 pos = 0; pos < data.size();) { + frag_offsets.push_back(pos); + size_t end = find_null(data, pos, entsize); + if (end == data.npos) + Fatal(ctx) << *section << ": string is not null terminated"; + pos = end + entsize; + } + } else { + if (data.size() % entsize) + Fatal(ctx) << *section << ": section size is not multiple of sh_entsize"; + frag_offsets.reserve(data.size() / entsize); + + for (i64 pos = 0; pos < data.size(); pos += entsize) + frag_offsets.push_back(pos); } - return false; + // Compute hashes for section pieces + HyperLogLog estimator; + hashes.reserve(frag_offsets.size()); + + for (i64 i = 0; i < frag_offsets.size(); i++) { + u64 hash = hash_string(get_contents(i)); + hashes.push_back(hash); + estimator.insert(hash); + } + + parent.estimator.merge(estimator); + + static Counter counter("string_fragments"); + counter += frag_offsets.size(); +} + +template +void MergeableSection::resolve_contents(Context &ctx) { + fragments.reserve(frag_offsets.size()); + for (i64 i = 0; i < frag_offsets.size(); i++) + fragments.push_back(parent.insert(ctx, get_contents(i), hashes[i], p2align)); + + // Reclaim memory as we'll never use this vector again + hashes.clear(); + hashes.shrink_to_fit(); } using E = MOLD_TARGET; template bool cie_equals(const CieRecord &, const CieRecord &); template class InputSection; +template class MergeableSection; -} // namespace mold::elf +} // namespace mold diff --git a/elf/linker-script.cc b/src/linker-script.cc similarity index 51% rename from elf/linker-script.cc rename to src/linker-script.cc index 28aedad9..6fe5dab6 100644 --- a/elf/linker-script.cc +++ b/src/linker-script.cc @@ -8,10 +8,7 @@ #include #include -namespace mold::elf { - -template -void read_version_script(Context &ctx, std::span &tok); +namespace mold { static std::string_view get_line(std::string_view input, const char *pos) { assert(input.data() <= pos); @@ -31,40 +28,27 @@ static std::string_view get_line(std::string_view input, const char *pos) { } template -class SyntaxError { -public: - SyntaxError(Context &ctx, std::string_view errpos) : out(ctx) { - std::string_view contents = ctx.script_file->get_contents(); - std::string_view line = get_line(contents, errpos.data()); - - i64 lineno = 1; - for (i64 i = 0; contents.data() + i < line.data(); i++) - if (contents[i] == '\n') - lineno++; - - std::string label = ctx.script_file->name + ":" + - std::to_string(lineno) + ": "; - i64 indent = strlen("mold: fatal: ") + label.size(); - i64 column = errpos.data() - line.data(); - - out << label << line << "\n" - << std::string(indent + column, ' ') << "^ "; - } +void Script::error(std::string_view pos, std::string msg) { + std::string_view input = mf->get_contents(); + std::string_view line = get_line(input, pos.data()); - template SyntaxError &operator<<(T &&val) { - out << std::forward(val); - return *this; - } + i64 lineno = 1; + for (i64 i = 0; input.data() + i < line.data(); i++) + if (input[i] == '\n') + lineno++; - [[noreturn]] ~SyntaxError() = default; + std::string label = mf->name + ":" + std::to_string(lineno) + ": "; + i64 indent = strlen("mold: fatal: ") + label.size(); + i64 column = pos.data() - line.data(); - Fatal> out; -}; + Fatal(ctx) << label << line << "\n" + << std::string(indent + column, ' ') << "^ " << msg; +} template -static std::vector -tokenize(Context &ctx, std::string_view input) { - std::vector vec; +void Script::tokenize() { + std::string_view input = mf->get_contents(); + while (!input.empty()) { if (isspace(input[0])) { input = input.substr(1); @@ -74,7 +58,7 @@ tokenize(Context &ctx, std::string_view input) { if (input.starts_with("/*")) { i64 pos = input.find("*/", 2); if (pos == std::string_view::npos) - SyntaxError(ctx, input) << "unclosed comment"; + error(input, "unclosed comment"); input = input.substr(pos + 2); continue; } @@ -90,8 +74,8 @@ tokenize(Context &ctx, std::string_view input) { if (input[0] == '"') { i64 pos = input.find('"', 1); if (pos == std::string_view::npos) - SyntaxError(ctx, input) << "unclosed string literal"; - vec.push_back(input.substr(0, pos + 1)); + error(input, "unclosed string literal"); + tokens.push_back(input.substr(0, pos + 1)); input = input.substr(pos + 1); continue; } @@ -105,20 +89,18 @@ tokenize(Context &ctx, std::string_view input) { else if (pos == input.npos) pos = input.size(); - vec.push_back(input.substr(0, pos)); + tokens.push_back(input.substr(0, pos)); input = input.substr(pos); } - return vec; } template -static std::span -skip(Context &ctx, std::span tok, std::string_view str) { +std::span +Script::skip(std::span tok, std::string_view str) { if (tok.empty()) - Fatal(ctx) << ctx.script_file->name << ": expected '" << str - << "', but got EOF"; + Fatal(ctx) << mf->name << ": expected '" << str << "', but got EOF"; if (tok[0] != str) - SyntaxError(ctx, tok[0]) << "expected '" << str << "'"; + error(tok[0], "expected '" + std::string(str) + "'"); return tok.subspan(1); } @@ -131,13 +113,13 @@ static std::string_view unquote(std::string_view s) { } template -static std::span -read_output_format(Context &ctx, std::span tok) { - tok = skip(ctx, tok, "("); +std::span +Script::read_output_format(std::span tok) { + tok = skip(tok, "("); while (!tok.empty() && tok[0] != ")") tok = tok.subspan(1); if (tok.empty()) - Fatal(ctx) << ctx.script_file->name << ": expected ')', but got EOF"; + Fatal(ctx) << mf->name << ": expected ')', but got EOF"; return tok.subspan(1); } @@ -149,8 +131,7 @@ static bool is_in_sysroot(Context &ctx, std::string path) { } template -static MappedFile * -resolve_path(Context &ctx, std::string_view tok, bool check_target) { +MappedFile *Script::resolve_path(std::string_view tok, bool check_target) { std::string str(unquote(tok)); auto open = [&](const std::string &path) -> MappedFile * { @@ -159,7 +140,7 @@ resolve_path(Context &ctx, std::string_view tok, bool check_target) { return nullptr; if (check_target) { - std::string_view target = get_machine_type(ctx, mf); + std::string_view target = get_machine_type(ctx, rctx, mf); if (!target.empty() && target != E::target_name) { Warn(ctx) << path << ": skipping incompatible file: " << target << " (e_machine " << (int)E::e_machine << ")"; @@ -171,7 +152,7 @@ resolve_path(Context &ctx, std::string_view tok, bool check_target) { // GNU ld prepends the sysroot if a pathname starts with '/' and the // script being processed is in the sysroot. We do the same. - if (str.starts_with('/') && is_in_sysroot(ctx, ctx.script_file->name)) + if (str.starts_with('/') && is_in_sysroot(ctx, mf->name)) return must_open_file(ctx, ctx.arg.sysroot + str); if (str.starts_with('=')) { @@ -184,11 +165,11 @@ resolve_path(Context &ctx, std::string_view tok, bool check_target) { } if (str.starts_with("-l")) - return find_library(ctx, str.substr(2)); + return find_library(ctx, rctx, str.substr(2)); if (!str.starts_with('/')) - if (MappedFile *mf = open(path_clean(ctx.script_file->name + "/../" + str))) - return mf; + if (MappedFile *mf2 = open(path_clean(mf->name + "/../" + str))) + return mf2; if (MappedFile *mf = open(str)) return mf; @@ -199,50 +180,48 @@ resolve_path(Context &ctx, std::string_view tok, bool check_target) { return mf; } - SyntaxError(ctx, tok) << "library not found: " << str; + error(tok, "library not found: " + str); } template -static std::span -read_group(Context &ctx, std::span tok) { - tok = skip(ctx, tok, "("); +std::span +Script::read_group(std::span tok) { + tok = skip(tok, "("); while (!tok.empty() && tok[0] != ")") { if (tok[0] == "AS_NEEDED") { - bool orig = ctx.as_needed; - ctx.as_needed = true; - tok = read_group(ctx, tok.subspan(1)); - ctx.as_needed = orig; + bool orig = rctx.as_needed; + rctx.as_needed = true; + tok = read_group(tok.subspan(1)); + rctx.as_needed = orig; continue; } - MappedFile *mf = resolve_path(ctx, tok[0], true); - read_file(ctx, mf); + MappedFile *mf = resolve_path(tok[0], true); + read_file(ctx, rctx, mf); tok = tok.subspan(1); } if (tok.empty()) - Fatal(ctx) << ctx.script_file->name << ": expected ')', but got EOF"; + Fatal(ctx) << mf->name << ": expected ')', but got EOF"; return tok.subspan(1); } template -void parse_linker_script(Context &ctx, MappedFile *mf) { - ctx.script_file = mf; - - std::vector vec = tokenize(ctx, mf->get_contents()); - std::span tok = vec; +void Script::parse_linker_script() { + std::call_once(once, [&] { tokenize(); }); + std::span tok = tokens; while (!tok.empty()) { if (tok[0] == "OUTPUT_FORMAT") { - tok = read_output_format(ctx, tok.subspan(1)); + tok = read_output_format(tok.subspan(1)); } else if (tok[0] == "INPUT" || tok[0] == "GROUP") { - tok = read_group(ctx, tok.subspan(1)); + tok = read_group(tok.subspan(1)); } else if (tok[0] == "VERSION") { tok = tok.subspan(1); - tok = skip(ctx, tok, "{"); - read_version_script(ctx, tok); - tok = skip(ctx, tok, "}"); + tok = skip(tok, "{"); + tok = read_version_script(tok); + tok = skip(tok, "}"); } else if (tok.size() > 3 && tok[1] == "=" && tok[3] == ";") { ctx.arg.defsyms.emplace_back(get_symbol(ctx, unquote(tok[0])), get_symbol(ctx, unquote(tok[2]))); @@ -250,18 +229,15 @@ void parse_linker_script(Context &ctx, MappedFile *mf) { } else if (tok[0] == ";") { tok = tok.subspan(1); } else { - SyntaxError(ctx, tok[0]) << "unknown linker script token"; + error(tok[0], "unknown linker script token"); } } } template -std::string_view -get_script_output_type(Context &ctx, MappedFile *mf) { - ctx.script_file = mf; - - std::vector vec = tokenize(ctx, mf->get_contents()); - std::span tok = vec; +std::string_view Script::get_script_output_type() { + std::call_once(once, [&] { tokenize(); }); + std::span tok = tokens; if (tok.size() >= 3 && tok[0] == "OUTPUT_FORMAT" && tok[1] == "(") { if (tok[2] == "elf64-x86-64") @@ -272,14 +248,12 @@ get_script_output_type(Context &ctx, MappedFile *mf) { if (tok.size() >= 3 && (tok[0] == "INPUT" || tok[0] == "GROUP") && tok[1] == "(") - if (MappedFile *mf = resolve_path(ctx, tok[2], false)) - return get_machine_type(ctx, mf); - + if (MappedFile *mf = resolve_path(tok[2], false)) + return get_machine_type(ctx, rctx, mf); return ""; } -static bool read_label(std::span &tok, - std::string label) { +static bool read_label(std::span &tok, std::string label) { if (tok.size() >= 1 && tok[0] == label + ":") { tok = tok.subspan(1); return true; @@ -293,10 +267,10 @@ static bool read_label(std::span &tok, } template -static void -read_version_script_commands(Context &ctx, std::span &tok, - std::string_view ver_str, u16 ver_idx, - bool is_global, bool is_cpp) { +std::span +Script::read_version_script_commands(std::span tok, + std::string_view ver_str, u16 ver_idx, + bool is_global, bool is_cpp) { while (!tok.empty() && tok[0] != "}") { if (read_label(tok, "global")) { is_global = true; @@ -313,39 +287,41 @@ read_version_script_commands(Context &ctx, std::span &tok, if (!tok.empty() && tok[0] == "\"C\"") { tok = tok.subspan(1); - tok = skip(ctx, tok, "{"); - read_version_script_commands( ctx, tok, ver_str, ver_idx, is_global, false); + tok = skip(tok, "{"); + tok = read_version_script_commands(tok, ver_str, ver_idx, is_global, false); } else { - tok = skip(ctx, tok, "\"C++\""); - tok = skip(ctx, tok, "{"); - read_version_script_commands(ctx, tok, ver_str, ver_idx, is_global, true); + tok = skip(tok, "\"C++\""); + tok = skip(tok, "{"); + tok = read_version_script_commands(tok, ver_str, ver_idx, is_global, true); } - tok = skip(ctx, tok, "}"); - tok = skip(ctx, tok, ";"); + tok = skip(tok, "}"); + tok = skip(tok, ";"); continue; } if (tok[0] == "*") { ctx.default_version = (is_global ? ver_idx : (u32)VER_NDX_LOCAL); } else if (is_global) { - ctx.version_patterns.push_back({unquote(tok[0]), ctx.script_file->name, - ver_str, ver_idx, is_cpp}); + ctx.version_patterns.push_back({unquote(tok[0]), mf->name, ver_str, + ver_idx, is_cpp}); } else { - ctx.version_patterns.push_back({unquote(tok[0]), ctx.script_file->name, - ver_str, VER_NDX_LOCAL, is_cpp}); + ctx.version_patterns.push_back({unquote(tok[0]), mf->name, ver_str, + VER_NDX_LOCAL, is_cpp}); } tok = tok.subspan(1); if (!tok.empty() && tok[0] == "}") - return; - tok = skip(ctx, tok, ";"); + break; + tok = skip(tok, ";"); } + return tok; } template -void read_version_script(Context &ctx, std::span &tok) { +std::span +Script::read_version_script(std::span tok) { u16 next_ver = VER_NDX_LAST_RESERVED + ctx.arg.version_definitions.size() + 1; while (!tok.empty() && tok[0] != "}") { @@ -362,83 +338,87 @@ void read_version_script(Context &ctx, std::span &tok) { tok = tok.subspan(1); } - tok = skip(ctx, tok, "{"); - read_version_script_commands(ctx, tok, ver_str, ver_idx, true, false); - tok = skip(ctx, tok, "}"); + tok = skip(tok, "{"); + tok = read_version_script_commands(tok, ver_str, ver_idx, true, false); + tok = skip(tok, "}"); if (!tok.empty() && tok[0] != ";") tok = tok.subspan(1); - tok = skip(ctx, tok, ";"); + tok = skip(tok, ";"); } + return tok; } template -void parse_version_script(Context &ctx, MappedFile *mf) { - ctx.script_file = mf; - std::vector vec = tokenize(ctx, mf->get_contents()); - std::span tok = vec; - read_version_script(ctx, tok); +void Script::parse_version_script() { + std::call_once(once, [&] { tokenize(); }); + std::span tok = tokens; + tok = read_version_script(tok); if (!tok.empty()) - SyntaxError(ctx, tok[0]) << "trailing garbage token"; + error(tok[0], "trailing garbage token"); } template -void read_dynamic_list_commands(Context &ctx, - std::vector &result, - std::span &tok, - bool is_cpp) { +std::span +Script::read_dynamic_list_commands(std::span tok, + std::vector &result, + bool is_cpp) { while (!tok.empty() && tok[0] != "}") { if (tok[0] == "extern") { tok = tok.subspan(1); if (!tok.empty() && tok[0] == "\"C\"") { tok = tok.subspan(1); - tok = skip(ctx, tok, "{"); - read_dynamic_list_commands(ctx, result, tok, false); + tok = skip(tok, "{"); + tok = read_dynamic_list_commands(tok, result, false); } else { - tok = skip(ctx, tok, "\"C++\""); - tok = skip(ctx, tok, "{"); - read_dynamic_list_commands(ctx, result, tok, true); + tok = skip(tok, "\"C++\""); + tok = skip(tok, "{"); + tok = read_dynamic_list_commands(tok, result, true); } - tok = skip(ctx, tok, "}"); - tok = skip(ctx, tok, ";"); + tok = skip(tok, "}"); + tok = skip(tok, ";"); continue; } result.push_back({unquote(tok[0]), "", is_cpp}); - tok = skip(ctx, tok.subspan(1), ";"); + tok = skip(tok.subspan(1), ";"); } + return tok; } template -std::vector -parse_dynamic_list(Context &ctx, std::string_view path) { - std::string_view contents = - must_open_file(ctx, std::string(path))->get_contents(); - std::vector vec = tokenize(ctx, contents); - std::span tok = vec; +std::vector Script::parse_dynamic_list() { + std::call_once(once, [&] { tokenize(); }); + std::span tok = tokens; std::vector result; - tok = skip(ctx, tok, "{"); - read_dynamic_list_commands(ctx, result, tok, false); - tok = skip(ctx, tok, "}"); - tok = skip(ctx, tok, ";"); + tok = skip(tok, "{"); + tok = read_dynamic_list_commands(tok, result, false); + tok = skip(tok, "}"); + tok = skip(tok, ";"); if (!tok.empty()) - SyntaxError(ctx, tok[0]) << "trailing garbage token"; + error(tok[0], "trailing garbage token"); for (DynamicPattern &p : result) - p.source = path; - + p.source = mf->name; return result; } +template +std::vector +parse_dynamic_list(Context &ctx, std::string_view path) { + ReaderContext rctx; + MappedFile *mf = must_open_file(ctx, std::string(path)); + return Script(ctx, rctx, mf).parse_dynamic_list(); +} + using E = MOLD_TARGET; -template void parse_linker_script(Context &, MappedFile *); -template std::string_view get_script_output_type(Context &, MappedFile *); -template void parse_version_script(Context &, MappedFile *); -template std::vector parse_dynamic_list(Context &, std::string_view); +template class Script; +template +std::vector parse_dynamic_list(Context &, std::string_view); -} // namespace mold::elf +} // namespace mold diff --git a/elf/lto-unix.cc b/src/lto-unix.cc similarity index 98% rename from elf/lto-unix.cc rename to src/lto-unix.cc index e7c22e71..740842f8 100644 --- a/elf/lto-unix.cc +++ b/src/lto-unix.cc @@ -95,7 +95,7 @@ # define LOG std::ostringstream() #endif -namespace mold::elf { +namespace mold { // Global variables // We store LTO-related information to global variables, @@ -567,11 +567,7 @@ static ElfSym to_elf_sym(PluginSymbol &psym) { // Returns false if it's GCC. template static bool is_llvm(Context &ctx) { -#ifdef __MINGW32__ - return ctx.arg.plugin.ends_with("LLVMgold.dll"); -#else - return ctx.arg.plugin.ends_with("LLVMgold.so"); -#endif + return ctx.arg.plugin.find("LLVMgold.") != ctx.arg.plugin.npos; } // Returns true if a given linker plugin supports the get_symbols_v3 API. @@ -678,8 +674,8 @@ ObjectFile *read_lto_object(Context &ctx, MappedFile *mf) { // Entry point template -std::vector *> do_lto(Context &ctx) { - Timer t(ctx, "do_lto"); +std::vector *> run_lto_plugin(Context &ctx) { + Timer t(ctx, "run_lto_plugin"); load_lto_plugin(ctx); if (!ctx.arg.lto_pass2 && !supports_v3_api(ctx)) @@ -747,7 +743,7 @@ void lto_cleanup(Context &ctx) { using E = MOLD_TARGET; template ObjectFile *read_lto_object(Context &, MappedFile *); -template std::vector *> do_lto(Context &); +template std::vector *> run_lto_plugin(Context &); template void lto_cleanup(Context &); -} // namespace mold::elf +} // namespace mold diff --git a/elf/lto-win32.cc b/src/lto-win32.cc similarity index 71% rename from elf/lto-win32.cc rename to src/lto-win32.cc index 456e406f..f5d17eec 100644 --- a/elf/lto-win32.cc +++ b/src/lto-win32.cc @@ -1,7 +1,7 @@ #include "mold.h" #include "lto.h" -namespace mold::elf { +namespace mold { template ObjectFile *read_lto_object(Context &ctx, MappedFile *mf) { @@ -9,7 +9,7 @@ ObjectFile *read_lto_object(Context &ctx, MappedFile *mf) { } template -std::vector *> do_lto(Context &ctx) { +std::vector *> run_lto_plugin(Context &ctx) { return {}; } @@ -19,7 +19,7 @@ void lto_cleanup(Context &ctx) {} using E = MOLD_TARGET; template ObjectFile *read_lto_object(Context &, MappedFile *); -template std::vector *> do_lto(Context &); +template std::vector *> run_lto_plugin(Context &); template void lto_cleanup(Context &); -} // namespace mold::elf +} // namespace mold diff --git a/elf/lto.h b/src/lto.h similarity index 98% rename from elf/lto.h rename to src/lto.h index 5f2225d2..f1795534 100644 --- a/elf/lto.h +++ b/src/lto.h @@ -1,6 +1,6 @@ #pragma once -#include "../common/integers.h" +#include "../lib/integers.h" namespace mold { diff --git a/elf/main.cc b/src/main.cc similarity index 72% rename from elf/main.cc rename to src/main.cc index c3b1d929..ce94043c 100644 --- a/elf/main.cc +++ b/src/main.cc @@ -1,6 +1,6 @@ #include "mold.h" -#include "../common/archive-file.h" -#include "../common/output-file.h" +#include "filetype.h" +#include "../lib/archive-file.h" #include #include @@ -23,16 +23,17 @@ #ifdef MOLD_X86_64 int main(int argc, char **argv) { - return mold::elf::elf_main(argc, argv); + return mold::mold_main(argc, argv); } #endif -namespace mold::elf { +namespace mold { // Read the beginning of a given file and returns its machine type // (e.g. EM_X86_64 or EM_386). template -std::string_view get_machine_type(Context &ctx, MappedFile *mf) { +std::string_view +get_machine_type(Context &ctx, ReaderContext &rctx, MappedFile *mf) { auto get_elf_type = [&](u8 *buf) -> std::string_view { bool is_le = (((ElfEhdr *)buf)->e_ident[EI_DATA] == ELFDATA2LSB); bool is_64; @@ -73,8 +74,6 @@ std::string_view get_machine_type(Context &ctx, MappedFile *mf) { return M68K::target_name; case EM_SH: return SH4::target_name; - case EM_ALPHA: - return ALPHA::target_name; case EM_LOONGARCH: return is_64 ? LOONGARCH64::target_name : LOONGARCH32::target_name; default: @@ -100,7 +99,7 @@ std::string_view get_machine_type(Context &ctx, MappedFile *mf) { return get_elf_type(child->data); return ""; case FileType::TEXT: - return get_script_output_type(ctx, mf); + return Script(ctx, rctx, mf).get_script_output_type(); default: return ""; } @@ -108,33 +107,33 @@ std::string_view get_machine_type(Context &ctx, MappedFile *mf) { template static void -check_file_compatibility(Context &ctx, MappedFile *mf) { - std::string_view target = get_machine_type(ctx, mf); +check_file_compatibility(Context &ctx, ReaderContext &rctx, MappedFile *mf) { + std::string_view target = get_machine_type(ctx, rctx, mf); if (target != ctx.arg.emulation) Fatal(ctx) << mf->name << ": incompatible file type: " << ctx.arg.emulation << " is expected but got " << target; } template -static ObjectFile *new_object_file(Context &ctx, MappedFile *mf, - std::string archive_name) { +static ObjectFile *new_object_file(Context &ctx, ReaderContext &rctx, + MappedFile *mf, std::string archive_name) { static Counter count("parsed_objs"); count++; - check_file_compatibility(ctx, mf); + check_file_compatibility(ctx, rctx, mf); - bool in_lib = ctx.in_lib || (!archive_name.empty() && !ctx.whole_archive); + bool in_lib = rctx.in_lib || (!archive_name.empty() && !rctx.whole_archive); ObjectFile *file = ObjectFile::create(ctx, mf, archive_name, in_lib); file->priority = ctx.file_priority++; - ctx.tg.run([file, &ctx] { file->parse(ctx); }); + rctx.tg->run([file, &ctx] { file->parse(ctx); }); if (ctx.arg.trace) Out(ctx) << "trace: " << *file; return file; } template -static ObjectFile *new_lto_obj(Context &ctx, MappedFile *mf, - std::string archive_name) { +static ObjectFile *new_lto_obj(Context &ctx, ReaderContext &rctx, + MappedFile *mf, std::string archive_name) { static Counter count("parsed_lto_objs"); count++; @@ -144,7 +143,7 @@ static ObjectFile *new_lto_obj(Context &ctx, MappedFile *mf, ObjectFile *file = read_lto_object(ctx, mf); file->priority = ctx.file_priority++; file->archive_name = archive_name; - file->is_in_lib = ctx.in_lib || (!archive_name.empty() && !ctx.whole_archive); + file->is_in_lib = rctx.in_lib || (!archive_name.empty() && !rctx.whole_archive); file->is_alive = !file->is_in_lib; if (ctx.arg.trace) Out(ctx) << "trace: " << *file; @@ -153,40 +152,37 @@ static ObjectFile *new_lto_obj(Context &ctx, MappedFile *mf, template static SharedFile * -new_shared_file(Context &ctx, MappedFile *mf) { - check_file_compatibility(ctx, mf); +new_shared_file(Context &ctx, ReaderContext &rctx, MappedFile *mf) { + check_file_compatibility(ctx, rctx, mf); SharedFile *file = SharedFile::create(ctx, mf); file->priority = ctx.file_priority++; - ctx.tg.run([file, &ctx] { file->parse(ctx); }); + file->is_alive = !rctx.as_needed; + rctx.tg->run([file, &ctx] { file->parse(ctx); }); if (ctx.arg.trace) Out(ctx) << "trace: " << *file; return file; } template -void read_file(Context &ctx, MappedFile *mf) { - if (ctx.visited.contains(mf->name)) - return; - +void read_file(Context &ctx, ReaderContext &rctx, MappedFile *mf) { switch (get_file_type(ctx, mf)) { case FileType::ELF_OBJ: - ctx.objs.push_back(new_object_file(ctx, mf, "")); + ctx.objs.push_back(new_object_file(ctx, rctx, mf, "")); return; case FileType::ELF_DSO: - ctx.dsos.push_back(new_shared_file(ctx, mf)); - ctx.visited.insert(mf->name); + ctx.dsos.push_back(new_shared_file(ctx, rctx, mf)); return; case FileType::AR: case FileType::THIN_AR: for (MappedFile *child : read_archive_members(ctx, mf)) { switch (get_file_type(ctx, child)) { case FileType::ELF_OBJ: - ctx.objs.push_back(new_object_file(ctx, child, mf->name)); + ctx.objs.push_back(new_object_file(ctx, rctx, child, mf->name)); break; case FileType::GCC_LTO_OBJ: case FileType::LLVM_BITCODE: - if (ObjectFile *file = new_lto_obj(ctx, child, mf->name)) + if (ObjectFile *file = new_lto_obj(ctx, rctx, child, mf->name)) ctx.objs.push_back(file); break; case FileType::ELF_DSO: @@ -197,15 +193,13 @@ void read_file(Context &ctx, MappedFile *mf) { break; } } - if (!ctx.whole_archive) - ctx.visited.insert(mf->name); return; case FileType::TEXT: - parse_linker_script(ctx, mf); + Script(ctx, rctx, mf).parse_linker_script(); return; case FileType::GCC_LTO_OBJ: case FileType::LLVM_BITCODE: - if (ObjectFile *file = new_lto_obj(ctx, mf, "")) + if (ObjectFile *file = new_lto_obj(ctx, rctx, mf, "")) ctx.objs.push_back(file); return; default: @@ -215,33 +209,46 @@ void read_file(Context &ctx, MappedFile *mf) { template static std::string_view -detect_machine_type(Context &ctx, std::vector paths) { - std::erase(paths, "-"); - - for (const std::string &path : paths) - if (auto *mf = open_file(ctx, path)) - if (get_file_type(ctx, mf) != FileType::TEXT) - if (std::string_view target = get_machine_type(ctx, mf); - !target.empty()) - return target; - - for (const std::string &path : paths) - if (auto *mf = open_file(ctx, path)) - if (get_file_type(ctx, mf) == FileType::TEXT) - if (std::string_view target = get_script_output_type(ctx, mf); - !target.empty()) - return target; +detect_machine_type(Context &ctx, std::vector args) { + for (ReaderContext rctx; const std::string &arg : args) { + if (arg == "--Bstatic") { + rctx.static_ = true; + } else if (arg == "--Bdynamic") { + rctx.static_ = false; + } else if (!arg.starts_with('-')) { + if (MappedFile *mf = open_file(ctx, arg)) + if (get_file_type(ctx, mf) != FileType::TEXT) + if (std::string_view target = get_machine_type(ctx, rctx, mf); + !target.empty()) + return target; + } + } + + for (ReaderContext rctx; const std::string &arg : args) { + if (arg == "--Bstatic") { + rctx.static_ = true; + } else if (arg == "--Bdynamic") { + rctx.static_ = false; + } else if (!arg.starts_with('-')) { + if (MappedFile *mf = open_file(ctx, arg)) + if (get_file_type(ctx, mf) == FileType::TEXT) + if (std::string_view target = + Script(ctx, rctx, mf).get_script_output_type(); + !target.empty()) + return target; + } + } Fatal(ctx) << "-m option is missing"; } template -MappedFile *open_library(Context &ctx, std::string path) { +MappedFile *open_library(Context &ctx, ReaderContext &rctx, std::string path) { MappedFile *mf = open_file(ctx, path); if (!mf) return nullptr; - std::string_view target = get_machine_type(ctx, mf); + std::string_view target = get_machine_type(ctx, rctx, mf); if (!target.empty() && target != E::target_name) { Warn(ctx) << path << ": skipping incompatible file: " << target << " (e_machine " << (int)E::e_machine << ")"; @@ -251,11 +258,11 @@ MappedFile *open_library(Context &ctx, std::string path) { } template -MappedFile *find_library(Context &ctx, std::string name) { +MappedFile *find_library(Context &ctx, ReaderContext &rctx, std::string name) { if (name.starts_with(':')) { for (std::string_view dir : ctx.arg.library_paths) { std::string path = std::string(dir) + "/" + name.substr(1); - if (MappedFile *mf = open_library(ctx, path)) + if (MappedFile *mf = open_library(ctx, rctx, path)) return mf; } Fatal(ctx) << "library not found: " << name; @@ -263,94 +270,88 @@ MappedFile *find_library(Context &ctx, std::string name) { for (std::string_view dir : ctx.arg.library_paths) { std::string stem = std::string(dir) + "/lib" + name; - if (!ctx.is_static) - if (MappedFile *mf = open_library(ctx, stem + ".so")) + if (!rctx.static_) + if (MappedFile *mf = open_library(ctx, rctx, stem + ".so")) return mf; - if (MappedFile *mf = open_library(ctx, stem + ".a")) + if (MappedFile *mf = open_library(ctx, rctx, stem + ".a")) return mf; } Fatal(ctx) << "library not found: " << name; } -template -MappedFile *find_from_search_paths(Context &ctx, std::string name) { - if (MappedFile *mf = open_file(ctx, name)) - return mf; - - for (std::string_view dir : ctx.arg.library_paths) - if (MappedFile *mf = - open_file(ctx, std::string(dir) + "/" + name)) - return mf; - return nullptr; -} - template static void read_input_files(Context &ctx, std::span args) { Timer t(ctx, "read_input_files"); - std::vector> state; - ctx.is_static = ctx.arg.is_static; + ReaderContext rctx; + std::vector stack; + std::unordered_set visited; + + tbb::task_group tg; + rctx.tg = &tg; while (!args.empty()) { std::string_view arg = args[0]; args = args.subspan(1); if (arg == "--as-needed") { - ctx.as_needed = true; + rctx.as_needed = true; } else if (arg == "--no-as-needed") { - ctx.as_needed = false; + rctx.as_needed = false; } else if (arg == "--whole-archive") { - ctx.whole_archive = true; + rctx.whole_archive = true; } else if (arg == "--no-whole-archive") { - ctx.whole_archive = false; + rctx.whole_archive = false; } else if (arg == "--Bstatic") { - ctx.is_static = true; + rctx.static_ = true; } else if (arg == "--Bdynamic") { - ctx.is_static = false; + rctx.static_ = false; } else if (arg == "--start-lib") { - ctx.in_lib = true; + rctx.in_lib = true; } else if (arg == "--end-lib") { - ctx.in_lib = false; - } else if (remove_prefix(arg, "--version-script=")) { - MappedFile *mf = find_from_search_paths(ctx, std::string(arg)); - if (!mf) - Fatal(ctx) << "--version-script: file not found: " << arg; - parse_version_script(ctx, mf); + rctx.in_lib = false; } else if (arg == "--push-state") { - state.push_back({ctx.as_needed, ctx.whole_archive, ctx.is_static, - ctx.in_lib}); + stack.push_back(rctx); } else if (arg == "--pop-state") { - if (state.empty()) + if (stack.empty()) Fatal(ctx) << "no state pushed before popping"; - std::tie(ctx.as_needed, ctx.whole_archive, ctx.is_static, ctx.in_lib) = - state.back(); - state.pop_back(); - } else if (remove_prefix(arg, "-l")) { - MappedFile *mf = find_library(ctx, std::string(arg)); + rctx = stack.back(); + stack.pop_back(); + } else if (arg.starts_with("-l")) { + arg = arg.substr(2); + if (visited.contains(arg)) + continue; + visited.insert(arg); + + MappedFile *mf = find_library(ctx, rctx, std::string(arg)); mf->given_fullpath = false; - read_file(ctx, mf); + read_file(ctx, rctx, mf); } else { - read_file(ctx, must_open_file(ctx, std::string(arg))); + read_file(ctx, rctx, must_open_file(ctx, std::string(arg))); } } if (ctx.objs.empty()) Fatal(ctx) << "no input files"; - ctx.tg.wait(); + tg.wait(); +} + +template +static bool has_lto_obj(Context &ctx) { + for (ObjectFile *file : ctx.objs) + if (file->is_alive && (file->is_lto_obj || file->is_gcc_offload_obj)) + return true; + return false; } template -int elf_main(int argc, char **argv) { +int mold_main(int argc, char **argv) { Context ctx; // Process -run option first. process_run_subcommand() does not return. - if (argc >= 2 && (argv[1] == "-run"sv || argv[1] == "--run"sv)) { -#if defined(_WIN32) || defined(__APPLE__) - Fatal(ctx) << "-run is supported only on Unix"; -#endif + if (argc >= 2 && (argv[1] == "-run"sv || argv[1] == "--run"sv)) process_run_subcommand(ctx, argc, argv); - } // Parse non-positional command line options ctx.cmdline_args = expand_response_files(ctx, argv); @@ -375,12 +376,8 @@ int elf_main(int argc, char **argv) { << ": " << errno_string(); // Fork a subprocess unless --no-fork is given. - std::function on_complete; - -#if !defined(_WIN32) && !defined(__APPLE__) if (ctx.arg.fork) - on_complete = fork_child(); -#endif + fork_child(); acquire_global_lock(); @@ -393,8 +390,8 @@ int elf_main(int argc, char **argv) { // Handle --retain-symbols-file options if any. if (ctx.arg.retain_symbols_file) - for (std::string_view name : *ctx.arg.retain_symbols_file) - get_symbol(ctx, name)->write_to_symtab = true; + for (Symbol *sym : *ctx.arg.retain_symbols_file) + sym->write_to_symtab = true; for (std::string_view arg : ctx.arg.trace_symbol) get_symbol(ctx, arg)->is_traced = true; @@ -419,26 +416,26 @@ int elf_main(int argc, char **argv) { if (!ctx.arg.relocatable) create_internal_file(ctx); - // resolve_symbols is 4 things in 1 phase: - // - // - Determine the set of object files to extract from archives. - // - Remove redundant COMDAT sections (e.g. duplicate inline functions). - // - Finally, the actual symbol resolution. - // - LTO, which requires preliminary symbol resolution before running - // and a follow-up re-resolution after the LTO objects are emitted. - // - // These passes have complex interactions, and unfortunately has to be - // put together in a single phase. + // Resolve symbols by choosing the most appropriate file for each + // symbol. This pass also removes redundant comdat sections (e.g. + // duplicate inline functions). resolve_symbols(ctx); - // "Kill" .eh_frame input sections after symbol resolution. - kill_eh_frame_sections(ctx); + // If there's an object file compiled with -flto, do link-time + // optimization. + if (has_lto_obj(ctx)) + do_lto(ctx); - // Split mergeable section contents into section pieces. - split_section_pieces(ctx); + // Now that we know which object files are to be included to the + // final output, we can remove unnecessary files. + std::erase_if(ctx.objs, [](InputFile *file) { return !file->is_alive; }); + std::erase_if(ctx.dsos, [](InputFile *file) { return !file->is_alive; }); - // Resolve mergeable section pieces to merge them. - resolve_section_pieces(ctx); + // Parse .eh_frame section contents. + parse_eh_frame_sections(ctx); + + // Split mergeable section contents into section pieces. + create_merged_sections(ctx); // Handle --relocatable. Since the linker's behavior is quite different // from the normal one when the option is given, the logic is implemented @@ -472,9 +469,6 @@ int elf_main(int argc, char **argv) { if (ctx.arg.icf) icf_sections(ctx); - // Compute sizes of sections containing mergeable strings. - compute_merged_section_sizes(ctx); - // Create linker-synthesized sections such as .got or .plt. create_synthetic_sections(ctx); @@ -482,6 +476,9 @@ int elf_main(int argc, char **argv) { if (!ctx.arg.allow_multiple_definition) check_duplicate_symbols(ctx); + if (!ctx.arg.allow_shlib_undefined) + check_shlib_undefined(ctx); + // Warn if symbols with different types are defined under the same name. check_symbol_types(ctx); @@ -491,6 +488,10 @@ int elf_main(int argc, char **argv) { // Bin input sections into output sections. create_output_sections(ctx); + // Convert an .ARM.exidx to a synthetic section. + if constexpr (is_arm32) + create_arm_exidx_section(ctx); + // Handle --section-align options. if (!ctx.arg.section_align.empty()) apply_section_align(ctx); @@ -573,14 +574,17 @@ int elf_main(int argc, char **argv) { // Compute the is_weak bit for each imported symbol. compute_imported_symbol_weakness(ctx); - // Compute sizes of output sections while assigning offsets - // within an output section to input sections. - compute_section_sizes(ctx); - // Sort sections by section attributes so that we'll have to // create as few segments as possible. sort_output_sections(ctx); + if (!ctx.arg.separate_debug_file.empty()) + separate_debug_sections(ctx); + + // Compute sizes of output sections while assigning offsets + // within an output section to input sections. + compute_section_sizes(ctx); + // If --packed_dyn_relocs=relr was given, base relocations are stored // to a .relr.dyn section in a compressed form. Construct a compressed // relocations now so that we can fix section sizes and file layout. @@ -590,7 +594,7 @@ int elf_main(int argc, char **argv) { // Reserve a space for dynamic symbol strings in .dynstr and sort // .dynsym contents if necessary. Beyond this point, no symbol will // be added to .dynsym. - ctx.dynsym->finalize(ctx); + sort_dynsyms(ctx); // Print reports about undefined symbols, if needed. if (ctx.arg.unresolved_symbols == UNRESOLVED_ERROR) @@ -604,7 +608,8 @@ int elf_main(int argc, char **argv) { ctx.verneed->construct(ctx); // Compute .symtab and .strtab sizes for each file. - create_output_symtab(ctx); + if (!ctx.arg.strip_all) + create_output_symtab(ctx); // .eh_frame is a special section from the linker's point of view, // as its contents are parsed and reconstructed by the linker, @@ -627,8 +632,17 @@ int elf_main(int argc, char **argv) { // that they can jump to anywhere in ±2 GiB by default. They may // be replaced with shorter instruction sequences if destinations // are close enough. Do this optimization. - if constexpr (is_riscv) - filesize = riscv_resize_sections(ctx); + if constexpr (is_riscv || is_loongarch) { + shrink_sections(ctx); + filesize = set_osec_offsets(ctx); + } + + if constexpr (is_arm32) { + if (ctx.extra.exidx) { + ctx.extra.exidx->remove_duplicate_entries(ctx); + filesize = set_osec_offsets(ctx); + } + } // At this point, memory layout is fixed. @@ -640,16 +654,17 @@ int elf_main(int argc, char **argv) { // If --compress-debug-sections is given, compress .debug_* sections // using zlib. - if (ctx.arg.compress_debug_sections != COMPRESS_NONE) - filesize = compress_debug_sections(ctx); + if (ctx.arg.compress_debug_sections != COMPRESS_NONE) { + compress_debug_sections(ctx); + filesize = set_osec_offsets(ctx); + } // At this point, both memory and file layouts are fixed. t_before_copy.stop(); // Create an output file - ctx.output_file = - OutputFile>::open(ctx, ctx.arg.output, filesize, 0777); + ctx.output_file = OutputFile::open(ctx, ctx.arg.output, filesize, 0777); ctx.buf = ctx.output_file->buf; Timer t_copy(ctx, "copy"); @@ -657,27 +672,28 @@ int elf_main(int argc, char **argv) { // Copy input sections to the output file and apply relocations. copy_chunks(ctx); - if (ctx.arg.z_rewrite_endbr) - rewrite_endbr(ctx); + if constexpr (is_x86_64) + if (ctx.arg.z_rewrite_endbr) + rewrite_endbr(ctx); // Dynamic linker works better with sorted .rela.dyn section, // so we sort them. ctx.reldyn->sort(ctx); - // Zero-clear paddings between sections - clear_padding(ctx); + // .note.gnu.build-id section contains a cryptographic hash of the + // entire output file. Now that we wrote everything except build-id, + // we can compute it. + if (ctx.buildid) + write_build_id(ctx); // .gdb_index's contents cannot be constructed before applying // relocations to other debug sections. We have relocated debug // sections now, so write the .gdb_index section. - if (ctx.gdb_index) + if (ctx.gdb_index && ctx.arg.separate_debug_file.empty()) write_gdb_index(ctx); - // .note.gnu.build-id section contains a cryptographic hash of the - // entire output file. Now that we wrote everything except build-id, - // we can compute it. - if (ctx.buildid) - ctx.buildid->write_buildid(ctx); + if (!ctx.arg.separate_debug_file.empty()) + write_gnu_debuglink(ctx); t_copy.stop(); ctx.checkpoint(); @@ -697,6 +713,9 @@ int elf_main(int argc, char **argv) { if (ctx.arg.print_map) print_map(ctx); + if (!ctx.arg.separate_debug_file.empty()) + write_separate_debug_file(ctx); + // Show stats numbers if (ctx.arg.stats) show_stats(ctx); @@ -707,9 +726,7 @@ int elf_main(int argc, char **argv) { std::cout << std::flush; std::cerr << std::flush; - if (on_complete) - on_complete(); - + notify_parent(); release_global_lock(); if (ctx.arg.quick_exit) @@ -723,6 +740,6 @@ int elf_main(int argc, char **argv) { using E = MOLD_TARGET; -template int elf_main(int, char **); +template int mold_main(int, char **); -} // namespace mold::elf +} // namespace mold diff --git a/elf/mapfile.cc b/src/mapfile.cc similarity index 98% rename from elf/mapfile.cc rename to src/mapfile.cc index 4e730dd9..8d60971b 100644 --- a/elf/mapfile.cc +++ b/src/mapfile.cc @@ -7,7 +7,7 @@ #include #include -namespace mold::elf { +namespace mold { template using Map = @@ -114,4 +114,4 @@ using E = MOLD_TARGET; template void print_map(Context &ctx); -} // namespace mold::elf +} // namespace mold diff --git a/elf/mold-wrapper.c b/src/mold-wrapper.c similarity index 98% rename from elf/mold-wrapper.c rename to src/mold-wrapper.c index 6dadf811..d63d4de1 100644 --- a/elf/mold-wrapper.c +++ b/src/mold-wrapper.c @@ -9,7 +9,7 @@ #include #include -#if !defined(__OpenBSD__) && !defined(__FreeBSD__) +#if __has_include() # include #endif diff --git a/elf/mold.h b/src/mold.h similarity index 88% rename from elf/mold.h rename to src/mold.h index 79946c60..322a0ea8 100644 --- a/elf/mold.h +++ b/src/mold.h @@ -1,7 +1,7 @@ #pragma once +#include "../lib/common.h" #include "elf.h" -#include "../common/common.h" #include #include @@ -34,7 +34,7 @@ # include #endif -namespace mold::elf { +namespace mold { template class InputFile; template class InputSection; @@ -47,6 +47,7 @@ template class Symbol; template struct CieRecord; template struct Context; template struct FdeRecord; +template class MergeableSection; template class RelocSection; template @@ -59,7 +60,7 @@ std::string get_mold_version(); // template -struct SectionFragment { +struct __attribute__((aligned(4))) SectionFragment { SectionFragment(MergedSection *sec, bool is_alive) : output_section(*sec), is_alive(is_alive) {} @@ -236,14 +237,14 @@ struct InputSectionExtras { std::vector thunk_refs; }; -template +template requires is_riscv || is_loongarch struct InputSectionExtras { std::vector r_deltas; }; // InputSection represents a section in an input object file. template -class InputSection { +class __attribute__((aligned(4))) InputSection { public: InputSection(Context &ctx, ObjectFile &file, i64 shndx); @@ -263,7 +264,7 @@ class InputSection { std::span> get_fdes() const; std::string_view get_func_name(Context &ctx, i64 offset) const; bool is_relr_reloc(Context &ctx, const ElfRel &rel) const; - bool is_killed_by_icf() const; + bool icf_removed() const; bool record_undef_error(Context &ctx, const ElfRel &rel); std::pair *, i64> @@ -312,8 +313,6 @@ class InputSection { private: void scan_pcrel(Context &ctx, Symbol &sym, const ElfRel &rel); void scan_absrel(Context &ctx, Symbol &sym, const ElfRel &rel); - void scan_dyn_absrel(Context &ctx, Symbol &sym, const ElfRel &rel); - void scan_toc_rel(Context &ctx, Symbol &sym, const ElfRel &rel); void scan_tlsdesc(Context &ctx, Symbol &sym); void check_tlsle(Context &ctx, Symbol &sym, const ElfRel &rel); @@ -323,8 +322,6 @@ class InputSection { void apply_toc_rel(Context &ctx, Symbol &sym, const ElfRel &rel, u8 *loc, u64 S, i64 A, u64 P, ElfRel **dynrel); - void copy_contents_riscv(Context &ctx, u8 *buf); - u64 get_thunk_addr(i64 idx); std::optional get_tombstone(Symbol &sym, SectionFragment *frag); @@ -334,19 +331,18 @@ class InputSection { // tls.cc // -template u64 get_tls_begin(Context &); -template u64 get_tp_addr(Context &); -template u64 get_dtp_addr(Context &); +template u64 get_tp_addr(const ElfPhdr &); +template u64 get_dtp_addr(const ElfPhdr &); // // output-chunks.cc // template -OutputSection *find_section(Context &ctx, u32 sh_type); +Chunk *find_chunk(Context &ctx, u32 sh_type); template -OutputSection *find_section(Context &ctx, std::string_view name); +Chunk *find_chunk(Context &ctx, std::string_view name); template u64 get_eflags(Context &ctx) { @@ -367,15 +363,16 @@ void write_pltgot_entry(Context &ctx, u8 *buf, Symbol &sym); // Chunk represents a contiguous region in an output file. template -class Chunk { +class __attribute__((aligned(4))) Chunk { public: virtual ~Chunk() = default; virtual bool is_header() { return false; } virtual OutputSection *to_osec() { return nullptr; } + virtual void compute_section_size(Context &ctx) {} virtual i64 get_reldyn_size(Context &ctx) const { return 0; } virtual void construct_relr(Context &ctx) {} virtual void copy_buf(Context &ctx) {} - virtual void write_to(Context &ctx, u8 *buf) { unreachable(); } + virtual void write_to(Context &ctx, u8 *buf, ElfRel *rel) { unreachable(); } virtual void update_shdr(Context &ctx) {} std::string_view name; @@ -468,6 +465,24 @@ class InterpSection : public Chunk { void copy_buf(Context &ctx) override; }; +enum AbsRelKind { + ABS_REL_NONE, + ABS_REL_BASEREL, + ABS_REL_RELR, + ABS_REL_IFUNC, + ABS_REL_DYNREL, +}; + +// Represents a word-size absolute relocation (e.g. R_X86_64_64) +template +struct AbsRel { + InputSection *isec = nullptr; + u64 offset = 0; + Symbol *sym = nullptr; + i64 addend = 0; + AbsRelKind kind = ABS_REL_NONE; +}; + // Sections template class OutputSection : public Chunk { @@ -478,18 +493,22 @@ class OutputSection : public Chunk { } OutputSection *to_osec() override { return this; } + void compute_section_size(Context &ctx) override; + i64 get_reldyn_size(Context &ctx) const override; void construct_relr(Context &ctx) override; void copy_buf(Context &ctx) override; - void write_to(Context &ctx, u8 *buf) override; + void write_to(Context &ctx, u8 *buf, ElfRel *rel) override; void compute_symtab_size(Context &ctx) override; void populate_symtab(Context &ctx) override; + void scan_abs_relocations(Context &ctx); void create_range_extension_thunks(Context &ctx); std::vector *> members; std::vector>> thunks; std::unique_ptr> reloc_sec; + std::vector> abs_rels; Atomic sh_flags; }; @@ -751,12 +770,10 @@ class DynsymSection : public Chunk { } void add_symbol(Context &ctx, Symbol *sym); - void finalize(Context &ctx); void update_shdr(Context &ctx) override; void copy_buf(Context &ctx) override; std::vector *> symbols; - bool finalized = false; }; template @@ -793,25 +810,30 @@ class GnuHashSection : public Chunk { i64 num_buckets = -1; i64 num_bloom = 1; + i64 num_exported = -1; }; template class MergedSection : public Chunk { public: static MergedSection * - get_instance(Context &ctx, std::string_view name, i64 type, i64 flags, - i64 entsize, i64 addralign); + get_instance(Context &ctx, std::string_view name, const ElfShdr &shdr); SectionFragment *insert(Context &ctx, std::string_view data, u64 hash, i64 p2align); - void assign_offsets(Context &ctx); + void resolve(Context &ctx); + void compute_section_size(Context &ctx) override; void copy_buf(Context &ctx) override; - void write_to(Context &ctx, u8 *buf) override; + void write_to(Context &ctx, u8 *buf, ElfRel *rel) override; void print_stats(Context &ctx); + std::vector *> members; + std::mutex mu; + ConcurrentMap> map; HyperLogLog estimator; + bool resolved = false; private: MergedSection(std::string_view name, i64 flags, i64 type, i64 entsize); @@ -949,9 +971,8 @@ class BuildIdSection : public Chunk { void update_shdr(Context &ctx) override; void copy_buf(Context &ctx) override; - void write_buildid(Context &ctx); - static constexpr i64 HEADER_SIZE = 16; + std::vector contents; }; template @@ -987,6 +1008,22 @@ class NotePropertySection : public Chunk { std::map properties; }; +template +class GnuDebuglinkSection : public Chunk { +public: + GnuDebuglinkSection() { + this->name = ".gnu_debuglink"; + this->shdr.sh_type = SHT_PROGBITS; + this->shdr.sh_addralign = 4; + } + + void update_shdr(Context &ctx) override; + void copy_buf(Context &ctx) override; + + std::string filename; + u32 crc32 = 0; +}; + template class GdbIndexSection : public Chunk { public: @@ -1056,6 +1093,82 @@ class ComdatGroupSection : public Chunk { std::vector *> members; }; +// +// output-file.cc +// + +template +class OutputFile { +public: + static std::unique_ptr> + open(Context &ctx, std::string path, i64 filesize, int perm); + + virtual void close(Context &ctx) = 0; + virtual ~OutputFile() = default; + + u8 *buf = nullptr; + std::vector buf2; + std::string path; + int fd = -1; + i64 filesize = 0; + bool is_mmapped = false; + bool is_unmapped = false; + +protected: + OutputFile(std::string path, i64 filesize, bool is_mmapped) + : path(path), filesize(filesize), is_mmapped(is_mmapped) {} +}; + +template +class MallocOutputFile : public OutputFile { +public: + MallocOutputFile(Context &ctx, std::string path, i64 filesize, int perm) + : OutputFile(path, filesize, false), ptr(new u8[filesize]), + perm(perm) { + this->buf = ptr.get(); + } + + void close(Context &ctx) override { + Timer t(ctx, "close_file"); + FILE *fp; + + if (this->path == "-") { + fp = stdout; + } else { +#ifdef _WIN32 + int pmode = (perm & 0200) ? (_S_IREAD | _S_IWRITE) : _S_IREAD; + i64 fd = _open(this->path.c_str(), _O_RDWR | _O_CREAT | _O_BINARY, pmode); +#else + i64 fd = ::open(this->path.c_str(), O_RDWR | O_CREAT, perm); +#endif + if (fd == -1) + Fatal(ctx) << "cannot open " << this->path << ": " << errno_string(); +#ifdef _WIN32 + fp = _fdopen(fd, "wb"); +#else + fp = fdopen(fd, "w"); +#endif + } + + fwrite(this->buf, this->filesize, 1, fp); + if (!this->buf2.empty()) + fwrite(this->buf2.data(), this->buf2.size(), 1, fp); + fclose(fp); + } + +private: + std::unique_ptr ptr; + int perm; +}; + +template +class LockingOutputFile : public OutputFile { +public: + LockingOutputFile(Context &ctx, std::string path, int perm); + void resize(Context &ctx, i64 filesize); + void close(Context &ctx) override; +}; + // // gdb-index.cc // @@ -1090,15 +1203,23 @@ struct ComdatGroupRef { }; template -struct MergeableSection { +class MergeableSection { +public: + MergeableSection(Context &ctx, MergedSection &parent, + std::unique_ptr> &isec); + + void split_contents(Context &ctx); + void resolve_contents(Context &ctx); std::pair *, i64> get_fragment(i64 offset); std::string_view get_contents(i64 idx); - MergedSection *parent; - std::string_view contents; + MergedSection &parent; + std::vector *> fragments; + +private: + std::unique_ptr> section; std::vector frag_offsets; std::vector hashes; - std::vector *> fragments; u8 p2align = 0; }; @@ -1191,8 +1312,9 @@ class ObjectFile : public InputFile { void parse(Context &ctx); void initialize_symbols(Context &ctx); - void initialize_mergeable_sections(Context &ctx); - void resolve_section_pieces(Context &ctx); + void parse_ehframe(Context &ctx); + void convert_mergeable_sections(Context &ctx); + void reattach_section_pieces(Context &ctx); void resolve_symbols(Context &ctx) override; void mark_live_objects(Context &ctx, std::function *)> feeder) override; @@ -1222,9 +1344,6 @@ class ObjectFile : public InputFile { bool is_gcc_offload_obj = false; bool is_rust_obj = false; - i64 num_dynrel = 0; - i64 reldyn_offset = 0; - i64 fde_idx = 0; i64 fde_offset = 0; i64 fde_size = 0; @@ -1251,7 +1370,6 @@ class ObjectFile : public InputFile { void sort_relocations(Context &ctx); void initialize_ehframe_sections(Context &ctx); void parse_note_gnu_property(Context &ctx, const ElfShdr &shdr); - void parse_ehframe(Context &ctx); void override_symbol(Context &ctx, Symbol &sym, const ElfSym &esym, i64 symidx); void merge_visibility(Context &ctx, Symbol &sym, u8 visibility); @@ -1272,6 +1390,7 @@ class SharedFile : public InputFile { void resolve_symbols(Context &ctx) override; std::span *> get_symbols_at(Symbol *sym); i64 get_alignment(Symbol *sym); + std::vector get_dt_needed(Context &ctx); bool is_readonly(Symbol *sym); void mark_live_objects(Context &ctx, @@ -1285,10 +1404,11 @@ class SharedFile : public InputFile { std::vector> elf_syms2; private: - SharedFile(Context &ctx, MappedFile *mf); + SharedFile(Context &ctx, MappedFile *mf) : InputFile(ctx, mf) {} std::string get_soname(Context &ctx); void maybe_override_symbol(Symbol &sym, const ElfSym &esym); + std::vector read_dt_needed(Context &ctx); std::vector read_verdef(Context &ctx); std::vector versyms; @@ -1303,15 +1423,13 @@ class SharedFile : public InputFile { // linker-script.cc // -template -void parse_linker_script(Context &ctx, MappedFile *mf); - -template -std::string_view -get_script_output_type(Context &ctx, MappedFile *mf); - -template -void parse_version_script(Context &ctx, MappedFile *mf); +struct ReaderContext { + bool as_needed = false; + bool in_lib = false; + bool static_ = false; + bool whole_archive = false; + tbb::task_group *tg = nullptr; +}; struct DynamicPattern { std::string_view pattern; @@ -1319,6 +1437,48 @@ struct DynamicPattern { bool is_cpp = false; }; +template +class Script { +public: + Script(Context &ctx, ReaderContext &rctx, MappedFile *mf) + : ctx(ctx), rctx(rctx), mf(mf) {} + + std::string_view get_script_output_type(); + void parse_linker_script(); + void parse_version_script(); + std::vector parse_dynamic_list(); + +private: + [[noreturn]] void error(std::string_view pos, std::string msg); + + void tokenize(); + + std::span + skip(std::span tok, std::string_view str); + + std::span read_output_format(std::span tok); + std::span read_group(std::span tok); + + std::span + read_version_script_commands(std::span tok, + std::string_view ver_str, u16 ver_idx, + bool is_global, bool is_cpp); + + std::span read_version_script(std::span tok); + + MappedFile *resolve_path(std::string_view tok, bool check_target); + + std::span + read_dynamic_list_commands(std::span tok, + std::vector &result, bool is_cpp); + + Context &ctx; + ReaderContext &rctx; + MappedFile *mf = mf; + std::once_flag once; + std::vector tokens; +}; + template std::vector parse_dynamic_list(Context &ctx, std::string_view path); @@ -1331,11 +1491,25 @@ template ObjectFile *read_lto_object(Context &ctx, MappedFile *mb); template -std::vector *> do_lto(Context &ctx); +std::vector *> run_lto_plugin(Context &ctx); template void lto_cleanup(Context &ctx); +// +// shrink-sections.cc +// + +template +void shrink_sections(Context &ctx); + +template +void shrink_section(Context &ctx, InputSection &isec, bool use_rvc); + +template +i64 compute_distance(Context &ctx, Symbol &sym, + InputSection &isec, const ElfRel &rel); + // // gc-sections.cc // @@ -1368,7 +1542,8 @@ void print_map(Context &ctx); // subprocess.cc // -std::function fork_child(); +void fork_child(); +void notify_parent(); template [[noreturn]] @@ -1394,11 +1569,10 @@ template void apply_exclude_libs(Context &); template void create_synthetic_sections(Context &); template void set_file_priority(Context &); template void resolve_symbols(Context &); -template void kill_eh_frame_sections(Context &); -template void split_section_pieces(Context &); -template void resolve_section_pieces(Context &); +template void do_lto(Context &); +template void parse_eh_frame_sections(Context &); +template void create_merged_sections(Context &); template void convert_common_symbols(Context &); -template void compute_merged_section_sizes(Context &); template void create_output_sections(Context &); template void add_synthetic_symbols(Context &); template void apply_section_align(Context &); @@ -1406,6 +1580,7 @@ template void check_cet_errors(Context &); template void print_dependencies(Context &); template void write_repro_file(Context &); template void check_duplicate_symbols(Context &); +template void check_shlib_undefined(Context &); template void check_symbol_types(Context &); template void sort_init_fini(Context &); template void sort_ctor_dtor(Context &); @@ -1417,29 +1592,58 @@ template void claim_unresolved_symbols(Context &); template void scan_relocations(Context &); template void compute_imported_symbol_weakness(Context &); template void construct_relr(Context &); +template void sort_dynsyms(Context &); template void create_output_symtab(Context &); template void report_undef_errors(Context &); template void create_reloc_sections(Context &); template void copy_chunks(Context &); -template void rewrite_endbr(Context &); template void apply_version_script(Context &); template void parse_symbol_version(Context &); template void compute_import_export(Context &); template void compute_address_significance(Context &); -template void clear_padding(Context &); +template void separate_debug_sections(Context &); template void compute_section_headers(Context &); template i64 set_osec_offsets(Context &); template void fix_synthetic_symbols(Context &); -template i64 compress_debug_sections(Context &); +template void compress_debug_sections(Context &); +template void write_build_id(Context &); +template void write_gnu_debuglink(Context &); +template void write_separate_debug_file(Context &ctx); template void write_dependency_file(Context &); template void show_stats(Context &); +// +// arch-x86-64.cc +// + +void rewrite_endbr(Context &ctx); + // // arch-arm32.cc // +class Arm32ExidxSection : public Chunk { +public: + Arm32ExidxSection(OutputSection &osec) : output_section(osec) { + this->name = ".ARM.exidx"; + this->shdr.sh_type = SHT_ARM_EXIDX; + this->shdr.sh_flags = SHF_ALLOC; + this->shdr.sh_addralign = 4; + } + + void compute_section_size(Context &ctx) override; + void update_shdr(Context &ctx) override; + void remove_duplicate_entries(Context &ctx); + void copy_buf(Context &ctx) override; + +private: + std::vector get_contents(Context &ctx); + + OutputSection &output_section; +}; + template <> u64 get_eflags(Context &ctx); -void fixup_arm_exidx_section(Context &ctx); +void create_arm_exidx_section(Context &ctx); // // arch-riscv.cc @@ -1462,9 +1666,6 @@ class RiscvAttributesSection : public Chunk { template u64 get_eflags(Context &ctx); -template -i64 riscv_resize_sections(Context &ctx); - // // arch-ppc64v1.cc // @@ -1512,60 +1713,23 @@ class PPC64SaveRestoreSection : public Chunk { template <> u64 get_eflags(Context &ctx); -// -// arch-sparc.cc -// - -class SparcTlsGetAddrSection : public Chunk { -public: - SparcTlsGetAddrSection() { - this->name = ".tls_get_addr"; - this->shdr.sh_type = SHT_PROGBITS; - this->shdr.sh_flags = SHF_ALLOC | SHF_EXECINSTR; - this->shdr.sh_addralign = 4; - this->shdr.sh_size = 24; - } - - void copy_buf(Context &ctx) override; -}; - -// -// arch-alpha.cc -// - -class AlphaGotSection : public Chunk { -public: - AlphaGotSection() { - this->name = ".alpha_got"; - this->is_relro = true; - this->shdr.sh_type = SHT_PROGBITS; - this->shdr.sh_flags = SHF_ALLOC | SHF_WRITE; - this->shdr.sh_addralign = 8; - } - - void add_symbol(Symbol &sym, i64 addend); - void finalize(); - u64 get_addr(Symbol &sym, i64 addend); - i64 get_reldyn_size(Context &ctx) const override; - void copy_buf(Context &ctx) override; - - struct Entry { - bool operator==(const Entry &) const = default; - Symbol *sym; - i64 addend; - }; - -private: - std::vector entries; - std::mutex mu; -}; - // // main.cc // struct BuildId { - i64 size() const; + i64 size() const { + switch (kind) { + case HEX: + return value.size(); + case HASH: + return hash_size; + case UUID: + return 16; + default: + unreachable(); + } + } enum { NONE, HEX, HASH, UUID } kind = NONE; std::vector value; @@ -1624,6 +1788,11 @@ struct SectionOrder { template struct ContextExtras {}; +template <> +struct ContextExtras { + Arm32ExidxSection *exidx = nullptr; +}; + template struct ContextExtras { RiscvAttributesSection *riscv_attributes = nullptr; @@ -1649,13 +1818,7 @@ struct ContextExtras { template <> struct ContextExtras { - SparcTlsGetAddrSection *tls_get_addr_sec = nullptr; - Symbol *tls_get_addr_sym = nullptr; -}; - -template <> -struct ContextExtras { - AlphaGotSection *got = nullptr; + Symbol *tls_get_addr = nullptr; }; // Context represents a context object for each invocation of the linker. @@ -1668,6 +1831,9 @@ struct Context { arg.entry = get_symbol(*this, "_start"); arg.fini = get_symbol(*this, "_fini"); arg.init = get_symbol(*this, "_init"); + + if constexpr (is_sparc) + extra.tls_get_addr = get_symbol(*this, "__tls_get_addr"); } Context(const Context &) = delete; @@ -1681,6 +1847,7 @@ struct Context { // Command-line arguments struct { + BsymbolicKind Bsymbolic = BSYMBOLIC_NONE; BuildId build_id; CetReportKind z_cet_report = CET_REPORT_NONE; CompressKind compress_debug_sections = COMPRESS_NONE; @@ -1691,14 +1858,16 @@ struct Context { Symbol *fini = nullptr; Symbol *init = nullptr; UnresolvedKind unresolved_symbols = UNRESOLVED_IGNORE; - BsymbolicKind Bsymbolic = BSYMBOLIC_NONE; bool allow_multiple_definition = false; + bool allow_shlib_undefined = true; bool apply_dynamic_relocs = true; bool color_diagnostics = false; bool default_symver = false; bool demangle = true; + bool detach = true; bool discard_all = false; bool discard_locals = false; + bool dynamic_list_data = false; bool eh_frame_hdr = true; bool emit_relocs = false; bool enable_new_dtags = true; @@ -1713,7 +1882,6 @@ struct Context { bool icf = false; bool icf_all = false; bool ignore_data_address_equality = false; - bool is_static = false; bool lto_pass2 = false; bool nmagic = false; bool noinhibit_exec = false; @@ -1735,6 +1903,7 @@ struct Context { bool rosegment = true; bool shared = false; bool start_stop = false; + bool static_ = false; bool stats = false; bool strip_all = false; bool strip_debug = false; @@ -1770,8 +1939,6 @@ struct Context { i64 spare_program_headers = 0; i64 thread_count = 0; i64 z_stack_size = 0; - u64 shuffle_sections_seed; - std::string_view emulation; std::optional unique; std::optional physical_image_base; std::string Map; @@ -1783,9 +1950,11 @@ struct Context { std::string package_metadata; std::string plugin; std::string rpaths; + std::string separate_debug_file; std::string soname; std::string sysroot; - std::unique_ptr> retain_symbols_file; + std::string_view emulation; + std::optional *>> retain_symbols_file; std::unordered_map section_align; std::unordered_map section_start; std::unordered_set ignore_ir_file; @@ -1801,25 +1970,19 @@ struct Context { std::vector exclude_libs; std::vector filter; std::vector trace_symbol; + u32 z_x86_64_isa_level = 0; u64 image_base = 0x200000; + u64 shuffle_sections_seed = 0; } arg; std::vector version_patterns; std::vector dynamic_list_patterns; i64 default_version = VER_NDX_UNSPECIFIED; i64 page_size = E::page_size; + bool has_error = false; // Reader context - bool as_needed = false; - bool whole_archive = false; - bool is_static; - bool in_lib = false; i64 file_priority = 10000; - MappedFile *script_file = nullptr; - std::unordered_set visited; - tbb::task_group tg; - - bool has_error = false; // Symbol table tbb::concurrent_hash_map, HashCmp> symbol_map; @@ -1850,7 +2013,7 @@ struct Context { std::vector> internal_esyms; // Output buffer - std::unique_ptr>> output_file; + std::unique_ptr> output_file; u8 *buf = nullptr; bool overwrite_output_file = true; @@ -1861,6 +2024,9 @@ struct Context { tbb::concurrent_hash_map *, std::vector> undef_errors; + // For --separate-debug-file + std::vector *> debug_chunks; + // Output chunks OutputEhdr *ehdr = nullptr; OutputShdr *shdr = nullptr; @@ -1876,6 +2042,7 @@ struct Context { DynstrSection *dynstr = nullptr; HashSection *hash = nullptr; GnuHashSection *gnu_hash = nullptr; + GnuDebuglinkSection *gnu_debuglink = nullptr; ShstrtabSection *shstrtab = nullptr; PltSection *plt = nullptr; PltGotSection *pltgot = nullptr; @@ -1895,6 +2062,7 @@ struct Context { NotePropertySection *note_property = nullptr; GdbIndexSection *gdb_index = nullptr; RelroPaddingSection *relro_padding = nullptr; + MergedSection *comment = nullptr; [[no_unique_address]] ContextExtras extra; @@ -1940,21 +2108,20 @@ struct Context { }; template -std::string_view get_machine_type(Context &ctx, MappedFile *mf); +std::string_view +get_machine_type(Context &ctx, ReaderContext &rctx, MappedFile *mf); template -MappedFile *open_library(Context &ctx, std::string path); +MappedFile *open_library(Context &ctx, ReaderContext &rctx, std::string path); template -MappedFile *find_library(Context &ctx, std::string path); +MappedFile *find_library(Context &ctx, ReaderContext &rctx, std::string path); template -void read_file(Context &ctx, MappedFile *mf); +void read_file(Context &ctx, ReaderContext &rctx, MappedFile *mf); template -int elf_main(int argc, char **argv); - -int main(int argc, char **argv); +int mold_main(int argc, char **argv); template std::ostream &operator<<(std::ostream &out, const InputFile &file); @@ -2089,6 +2256,10 @@ class Symbol { TAG_MASK = 0b11, }; + // We want to make sure there are enough number of unused bits in + // pointers referring to these structures. In particular, we need + // __attribute__((aligned(4))) for m68k on which int, long, float + // and double are aligned only to two byte boundaries. static_assert(alignof(InputSection) >= 4); static_assert(alignof(Chunk) >= 4); static_assert(alignof(SectionFragment) >= 4); @@ -2225,6 +2396,10 @@ class Symbol { bool has_copyrel : 1 = false; bool is_copyrel_readonly : 1 = false; + // For symbol resolution. This flag is used rarely. See a comment in + // resolve_symbols(). + bool skip_dso : 1 = false; + // For --gc-sections bool gc_root : 1 = false; @@ -2337,12 +2512,19 @@ InputSection::get_fragment(Context &ctx, const ElfRel &rel) { assert(!(shdr().sh_flags & SHF_ALLOC)); const ElfSym &esym = file.elf_syms[rel.r_sym]; + if (esym.is_abs() || esym.is_common() || esym.is_undef()) + return {nullptr, 0}; + + i64 shndx = file.get_shndx(esym); + std::unique_ptr> &m = file.mergeable_sections[shndx]; + if (!m) + return {nullptr, 0}; + if (esym.st_type == STT_SECTION) - if (std::unique_ptr> &m = - file.mergeable_sections[file.get_shndx(esym)]) - return m->get_fragment(esym.st_value + get_addend(*this, rel)); + return m->get_fragment(esym.st_value + get_addend(*this, rel)); - return {nullptr, 0}; + std::pair *, i64> p = m->get_fragment(esym.st_value); + return {p.first, p.second + get_addend(*this, rel)}; } template @@ -2377,31 +2559,31 @@ InputSection::get_tombstone(Symbol &sym, SectionFragment *frag) { if (!isec || isec->is_alive) return {}; - std::string_view s = name(); - if (!s.starts_with(".debug")) + std::string_view str = name(); + if (!str.starts_with(".debug")) return {}; // If the section was dead due to ICF, we don't want to emit debug // info for that section but want to set real values to .debug_line so // that users can set a breakpoint inside a merged section. - if (isec->is_killed_by_icf() && s == ".debug_line") + if (isec->icf_removed() && str == ".debug_line") return {}; // 0 is an invalid value in most debug info sections, so we use it // as a tombstone value. .debug_loc and .debug_ranges reserve 0 as - // the terminator marker, so we use 1 if that's the case. - return (s == ".debug_loc" || s == ".debug_ranges") ? 1 : 0; + // the terminator marker, so we use 1 if that'str the case. + return (str == ".debug_loc" || str == ".debug_ranges") ? 1 : 0; } template -inline bool InputSection::is_killed_by_icf() const { +inline bool InputSection::icf_removed() const { return this->leader && this->leader != this; } template std::pair *, i64> MergeableSection::get_fragment(i64 offset) { - std::vector &vec = frag_offsets; + std::span vec = frag_offsets; auto it = std::upper_bound(vec.begin(), vec.end(), offset); i64 idx = it - 1 - vec.begin(); return {fragments[idx], offset - vec[idx]}; @@ -2411,8 +2593,8 @@ template std::string_view MergeableSection::get_contents(i64 i) { i64 cur = frag_offsets[i]; if (i == frag_offsets.size() - 1) - return contents.substr(cur); - return contents.substr(cur, frag_offsets[i + 1] - cur); + return section->contents.substr(cur); + return section->contents.substr(cur, frag_offsets[i + 1] - cur); } template @@ -2462,6 +2644,8 @@ inline i64 ObjectFile::get_shndx(const ElfSym &esym) { if (esym.st_shndx == SHN_XINDEX) return symtab_shndx_sec[&esym - &this->elf_syms[0]]; + if (esym.st_shndx >= SHN_LORESERVE) + return 0; return esym.st_shndx; } @@ -2504,7 +2688,7 @@ u64 Symbol::get_addr(Context &ctx, i64 flags) const { return value; // absolute symbol if (!isec->is_alive) { - if (isec->is_killed_by_icf()) + if (isec->icf_removed()) return isec->leader->get_addr() + value; if (isec->name() == ".eh_frame") { @@ -2898,4 +3082,13 @@ inline bool is_c_identifier(std::string_view s) { return true; } -} // namespace mold::elf +template +std::string_view save_string(Context &ctx, const std::string &str) { + u8 *buf = new u8[str.size() + 1]; + memcpy(buf, str.data(), str.size()); + buf[str.size()] = '\0'; + ctx.string_pool.push_back(std::unique_ptr(buf)); + return {(char *)buf, str.size()}; +} + +} // namespace mold diff --git a/elf/output-chunks.cc b/src/output-chunks.cc similarity index 86% rename from elf/output-chunks.cc rename to src/output-chunks.cc index 4337653d..149859ab 100644 --- a/elf/output-chunks.cc +++ b/src/output-chunks.cc @@ -1,7 +1,5 @@ #include "mold.h" - #include "config.h" -#include "blake3.h" #include #include @@ -11,7 +9,7 @@ #include #include -namespace mold::elf { +namespace mold { // The hash function for .hash. static u32 elf_hash(std::string_view name) { @@ -26,29 +24,19 @@ static u32 elf_hash(std::string_view name) { return h; } -// The hash function for .gnu.hash. -static u32 djb_hash(std::string_view name) { - u32 h = 5381; - for (u8 c : name) - h = (h << 5) + h + c; - return h; -} - template -OutputSection *find_section(Context &ctx, u32 sh_type) { +Chunk *find_chunk(Context &ctx, u32 sh_type) { for (Chunk *chunk : ctx.chunks) - if (OutputSection *osec = chunk->to_osec()) - if (osec->shdr.sh_type == sh_type) - return osec; + if (chunk->shdr.sh_type == sh_type) + return chunk; return nullptr; } template -OutputSection *find_section(Context &ctx, std::string_view name) { +Chunk *find_chunk(Context &ctx, std::string_view name) { for (Chunk *chunk : ctx.chunks) - if (OutputSection *osec = chunk->to_osec()) - if (osec->name == name) - return osec; + if (chunk->name == name) + return chunk; return nullptr; } @@ -165,10 +153,17 @@ static std::vector> create_phdr(Context &ctx) { phdr.p_type = type; phdr.p_flags = flags; phdr.p_align = chunk->shdr.sh_addralign; - phdr.p_offset = chunk->shdr.sh_offset; - if (chunk->shdr.sh_type != SHT_NOBITS) + if (chunk->shdr.sh_type == SHT_NOBITS) { + // p_offset indicates the in-file start offset and is not + // significant for segments with zero on-file size. We still want to + // keep it congruent with the virtual address modulo page size + // because some loaders (at least FreeBSD's) are picky about it. + phdr.p_offset = chunk->shdr.sh_addr % ctx.page_size; + } else { + phdr.p_offset = chunk->shdr.sh_offset; phdr.p_filesz = chunk->shdr.sh_size; + } phdr.p_vaddr = chunk->shdr.sh_addr; phdr.p_paddr = chunk->shdr.sh_addr; @@ -277,6 +272,10 @@ static std::vector> create_phdr(Context &ctx) { if (ctx.eh_frame_hdr) define(PT_GNU_EH_FRAME, PF_R, ctx.eh_frame_hdr); + // Add PT_GNU_PROPERTY + if (Chunk *chunk = find_chunk(ctx, ".note.gnu.property")) + define(PT_GNU_PROPERTY, PF_R, chunk); + // Add PT_GNU_STACK, which is a marker segment that doesn't really // contain any segments. It controls executable bit of stack area. { @@ -303,8 +302,8 @@ static std::vector> create_phdr(Context &ctx) { // Create a PT_ARM_EDXIDX if constexpr (is_arm32) - if (OutputSection *osec = find_section(ctx, SHT_ARM_EXIDX)) - define(PT_ARM_EXIDX, PF_R, osec); + if (ctx.extra.exidx) + define(PT_ARM_EXIDX, PF_R, ctx.extra.exidx); // Create a PT_RISCV_ATTRIBUTES if constexpr (is_riscv) @@ -371,9 +370,14 @@ void OutputPhdr::update_shdr(Context &ctx) { phdrs = create_phdr(ctx); this->shdr.sh_size = phdrs.size() * sizeof(ElfPhdr); - ctx.tls_begin = get_tls_begin(ctx); - ctx.tp_addr = get_tp_addr(ctx); - ctx.dtp_addr = get_dtp_addr(ctx); + for (ElfPhdr &phdr : phdrs) { + if (phdr.p_type == PT_TLS) { + ctx.tls_begin = phdr.p_vaddr; + ctx.tp_addr = get_tp_addr(phdr); + ctx.dtp_addr = get_dtp_addr(phdr); + break; + } + } } template @@ -400,11 +404,6 @@ void RelDynSection::update_shdr(Context &ctx) { offset += chunk->get_reldyn_size(ctx) * sizeof(ElfRel); } - for (ObjectFile *file : ctx.objs) { - file->reldyn_offset = offset; - offset += file->num_dynrel * sizeof(ElfRel); - } - this->shdr.sh_size = offset; this->shdr.sh_link = ctx.dynsym->shndx; } @@ -478,7 +477,7 @@ void StrtabSection::update_shdr(Context &ctx) { // affect correctness of the program but helps disassembler to // disassemble machine code appropriately. if constexpr (is_arm32) - if (!ctx.arg.strip_all && !ctx.arg.retain_symbols_file) + if (!ctx.arg.strip_all) offset += sizeof("$a\0$t\0$d"); for (Chunk *chunk : ctx.chunks) { @@ -505,7 +504,7 @@ void StrtabSection::copy_buf(Context &ctx) { buf[0] = '\0'; if constexpr (is_arm32) - if (!ctx.arg.strip_all && !ctx.arg.retain_symbols_file) + if (!ctx.arg.strip_all) memcpy(buf + 1, "$a\0$t\0$d", 9); } @@ -739,19 +738,19 @@ static std::vector> create_dynamic_section(Context &ctx) { define(DT_STRSZ, ctx.dynstr->shdr.sh_size); } - if (find_section(ctx, SHT_INIT_ARRAY)) { + if (find_chunk(ctx, SHT_INIT_ARRAY)) { define(DT_INIT_ARRAY, ctx.__init_array_start->value); define(DT_INIT_ARRAYSZ, ctx.__init_array_end->value - ctx.__init_array_start->value); } - if (find_section(ctx, SHT_PREINIT_ARRAY)) { + if (find_chunk(ctx, SHT_PREINIT_ARRAY)) { define(DT_PREINIT_ARRAY, ctx.__preinit_array_start->value); define(DT_PREINIT_ARRAYSZ, ctx.__preinit_array_end->value - ctx.__preinit_array_start->value); } - if (find_section(ctx, SHT_FINI_ARRAY)) { + if (find_chunk(ctx, SHT_FINI_ARRAY)) { define(DT_FINI_ARRAY, ctx.__fini_array_start->value); define(DT_FINI_ARRAYSZ, ctx.__fini_array_end->value - ctx.__fini_array_start->value); @@ -853,7 +852,7 @@ static std::vector> create_dynamic_section(Context &ctx) { template void DynamicSection::update_shdr(Context &ctx) { - if (ctx.arg.is_static && !ctx.arg.pie) + if (ctx.arg.static_ && !ctx.arg.pie) return; this->shdr.sh_size = create_dynamic_section(ctx).size() * sizeof(Word); @@ -863,20 +862,104 @@ void DynamicSection::update_shdr(Context &ctx) { template void DynamicSection::copy_buf(Context &ctx) { std::vector> contents = create_dynamic_section(ctx); - assert(this->shdr.sh_size == contents.size() * sizeof(contents[0])); + assert(this->shdr.sh_size == contents.size() * sizeof(Word)); write_vector(ctx.buf + this->shdr.sh_offset, contents); } +template +static std::vector> split(std::vector &input, i64 unit) { + std::span span(input); + std::vector> vec; + + while (span.size() >= unit) { + vec.push_back(span.subspan(0, unit)); + span = span.subspan(unit); + } + if (!span.empty()) + vec.push_back(span); + return vec; +} + + +// Assign offsets to OutputSection members +template +void OutputSection::compute_section_size(Context &ctx) { + ElfShdr &shdr = this->shdr; + + // On most RISC systems, we need to create so-called "range extension + // thunks" to extend branch instructions reach, as their jump + // instructions' reach is limited. create_range_extension_thunks() + // computes the size of the section while inserting thunks. + if constexpr (needs_thunk) { + if ((shdr.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable) { + create_range_extension_thunks(ctx); + return; + } + } + + // Since one output section may contain millions of input sections, + // we first split input sections into groups and assign offsets to + // groups. + struct Group { + std::span *> members; + i64 size = 0; + i64 p2align = 0; + i64 offset = 0; + }; + + std::span *> mem = members; + std::vector groups; + constexpr i64 group_size = 10000; + + while (!mem.empty()) { + i64 sz = std::min(group_size, mem.size()); + groups.push_back({mem.subspan(0, sz)}); + mem = mem.subspan(sz); + } + + tbb::parallel_for_each(groups, [](Group &group) { + for (InputSection *isec : group.members) { + group.size = align_to(group.size, 1 << isec->p2align) + isec->sh_size; + group.p2align = std::max(group.p2align, isec->p2align); + } + }); + + shdr.sh_size = 0; + + for (i64 i = 0; i < groups.size(); i++) { + shdr.sh_size = align_to(shdr.sh_size, 1 << groups[i].p2align); + groups[i].offset = shdr.sh_size; + shdr.sh_size += groups[i].size; + shdr.sh_addralign = std::max(shdr.sh_addralign, 1 << groups[i].p2align); + } + + // Assign offsets to input sections. + tbb::parallel_for_each(groups, [](Group &group) { + i64 offset = group.offset; + for (InputSection *isec : group.members) { + offset = align_to(offset, 1 << isec->p2align); + isec->offset = offset; + offset += isec->sh_size; + } + }); +} + template void OutputSection::copy_buf(Context &ctx) { - if (this->shdr.sh_type != SHT_NOBITS) - write_to(ctx, ctx.buf + this->shdr.sh_offset); + if (this->shdr.sh_type != SHT_NOBITS) { + ElfRel *rel = nullptr; + if (ctx.reldyn) + rel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + + this->reldyn_offset); + + write_to(ctx, ctx.buf + this->shdr.sh_offset, rel); + } } template -void OutputSection::write_to(Context &ctx, u8 *buf) { +void OutputSection::write_to(Context &ctx, u8 *buf, ElfRel *rel) { + // Copy section contents to an output file. tbb::parallel_for((i64)0, (i64)members.size(), [&](i64 i) { - // Copy section contents to an output file. InputSection &isec = *members[i]; isec.write_to(ctx, buf + isec.offset); @@ -901,11 +984,46 @@ void OutputSection::write_to(Context &ctx, u8 *buf) { } }); + // Emit range extension thunks. if constexpr (needs_thunk) { tbb::parallel_for_each(thunks, [&](std::unique_ptr> &thunk) { thunk->copy_buf(ctx); }); } + + // Emit dynamic relocations. + for (AbsRel &r : abs_rels) { + Word *loc = (Word *)(buf + r.isec->offset + r.offset); + u64 addr = this->shdr.sh_addr + r.isec->offset + r.offset; + Symbol &sym = *r.sym; + + switch (r.kind) { + case ABS_REL_NONE: + case ABS_REL_RELR: + *loc = sym.get_addr(ctx) + r.addend; + break; + case ABS_REL_BASEREL: { + u64 val = sym.get_addr(ctx) + r.addend; + *rel++ = ElfRel(addr, E::R_RELATIVE, 0, val); + if (ctx.arg.apply_dynamic_relocs) + *loc = val; + break; + } + case ABS_REL_IFUNC: + if constexpr (supports_ifunc) { + u64 val = sym.get_addr(ctx, NO_PLT) + r.addend; + *rel++ = ElfRel(addr, E::R_IRELATIVE, 0, val); + if (ctx.arg.apply_dynamic_relocs) + *loc = val; + } + break; + case ABS_REL_DYNREL: + *rel++ = ElfRel(addr, E::R_ABS, sym.get_dynsym_idx(ctx), r.addend); + if (ctx.arg.apply_dynamic_relocs) + *loc = r.addend; + break; + } + } } // .relr.dyn contains base relocations encoded in a space-efficient form. @@ -927,25 +1045,24 @@ void OutputSection::write_to(Context &ctx, u8 *buf) { // the .rel.dyn section). A bitmap has LSB 1. template static std::vector encode_relr(std::span pos) { + for (i64 i = 0; i < pos.size(); i++) { + assert(pos[i] % sizeof(Word) == 0); + assert(i == 0 || pos[i - 1] < pos[i]); + } + std::vector vec; i64 num_bits = E::is_64 ? 63 : 31; i64 max_delta = sizeof(Word) * num_bits; for (i64 i = 0; i < pos.size();) { - assert(i == 0 || pos[i - 1] < pos[i]); - assert(pos[i] % sizeof(Word) == 0); - vec.push_back(pos[i]); u64 base = pos[i] + sizeof(Word); i++; for (;;) { u64 bits = 0; - for (; i < pos.size() && pos[i] - base < max_delta; i++) { - assert(pos[i - 1] < pos[i]); - assert(pos[i] % sizeof(Word) == 0); + for (; i < pos.size() && pos[i] - base < max_delta; i++) bits |= (u64)1 << ((pos[i] - base) / sizeof(Word)); - } if (!bits) break; @@ -958,35 +1075,92 @@ static std::vector encode_relr(std::span pos) { } template -void OutputSection::construct_relr(Context &ctx) { - if (!ctx.arg.pic) - return; - if (!(this->shdr.sh_flags & SHF_ALLOC)) - return; - if (this->shdr.sh_addralign % sizeof(Word)) - return; +static AbsRelKind get_abs_rel_kind(Context &ctx, Symbol &sym) { + if (sym.is_ifunc()) + return sym.is_pde_ifunc(ctx) ? ABS_REL_NONE : ABS_REL_IFUNC; - // Skip it if it is a text section because .text doesn't usually - // contain any dynamic relocations. - if (this->shdr.sh_flags & SHF_EXECINSTR) - return; + if (sym.is_absolute()) + return ABS_REL_NONE; - // Collect base relocations - std::vector> shards(members.size()); + // True if the symbol's address is in the output file. + if (!sym.is_imported || (sym.flags & NEEDS_CPLT) || (sym.flags & NEEDS_COPYREL)) + return ctx.arg.pic ? ABS_REL_BASEREL : ABS_REL_NONE; - tbb::parallel_for((i64)0, (i64)members.size(), [&](i64 i) { - InputSection &isec = *members[i]; + return ABS_REL_DYNREL; +} - if (isec.shdr().sh_addralign % sizeof(Word) == 0) - for (const ElfRel &r : isec.get_rels(ctx)) - if (r.r_type == E::R_ABS && r.r_offset % sizeof(Word) == 0) - if (Symbol &sym = *isec.file.symbols[r.r_sym]; - !sym.is_ifunc() && !sym.is_absolute() && !sym.is_imported) - shards[i].push_back(isec.offset + r.r_offset); +// Scan word-size absolute relocations (e.g. R_X86_64_64). This is +// separated from scan_relocations() because only such relocations can +// be promoted to dynamic relocations. +template +void OutputSection::scan_abs_relocations(Context &ctx) { + std::vector>> shards(members.size()); + + // Collect all word-size absolute relocations + tbb::parallel_for((i64)0, (i64)members.size(), [&](i64 i) { + InputSection *isec = members[i]; + for (const ElfRel &r : isec->get_rels(ctx)) + if (r.r_type == E::R_ABS) + shards[i].push_back(AbsRel{isec, r.r_offset, isec->file.symbols[r.r_sym], + get_addend(*isec, r)}); }); - // Compress them - std::vector pos = flatten(shards); + abs_rels = flatten(shards); + + // We can sometimes avoid creating dynamic relocations in read-only + // sections by promoting symbols to canonical PLT or copy relocations. + if (!ctx.arg.pic && !(this->shdr.sh_flags & SHF_WRITE)) + for (AbsRel &r : abs_rels) + if (Symbol &sym = *r.sym; + sym.is_imported && !sym.is_absolute()) + sym.flags |= (sym.get_type() == STT_FUNC) ? NEEDS_CPLT : NEEDS_COPYREL; + + // Now we can compute whether they need to be promoted to dynamic + // relocations or not. + for (AbsRel &r : abs_rels) + r.kind = get_abs_rel_kind(ctx, *r.sym); + + // If we have a relocation against a read-only section, we need to + // set the DT_TEXTREL flag for the loader. + for (AbsRel &r : abs_rels) { + if (r.kind != ABS_REL_NONE && !(r.isec->shdr().sh_flags & SHF_WRITE)) { + if (ctx.arg.z_text) { + Error(ctx) << *r.isec << ": relocation at offset 0x" + << std::hex << r.offset << " against symbol `" + << *r.sym << "' can not be used; recompile with -fPIC"; + } else if (ctx.arg.warn_textrel) { + Warn(ctx) << *r.isec << ": relocation against symbol `" << *r.sym + << "' in read-only section"; + } + ctx.has_textrel = true; + } + } + + // If --pack-dyn-relocs=relr is enabled, base relocations are put into + // .relr.dyn. + if (ctx.arg.pack_dyn_relocs_relr) + for (AbsRel &r : abs_rels) + if (r.kind == ABS_REL_BASEREL && + r.isec->shdr().sh_addralign % sizeof(Word) == 0 && + r.offset % sizeof(Word) == 0) + r.kind = ABS_REL_RELR; +} + +template +i64 OutputSection::get_reldyn_size(Context &ctx) const { + i64 n = 0; + for (const AbsRel &r : abs_rels) + if (r.kind != ABS_REL_NONE && r.kind != ABS_REL_RELR) + n++; + return n; +} + +template +void OutputSection::construct_relr(Context &ctx) { + std::vector pos; + for (const AbsRel &r : abs_rels) + if (r.kind == ABS_REL_RELR) + pos.push_back(r.isec->offset + r.offset); this->relr = encode_relr(pos); } @@ -1091,7 +1265,7 @@ void GotSection::add_tlsdesc_symbol(Context &ctx, Symbol *sym) { // statically-linked executable), we always relax TLSDESC relocations // so that no TLSDESC relocation exist at runtime. assert(supports_tlsdesc); - assert(!ctx.arg.is_static); + assert(!ctx.arg.static_); sym->set_tlsdesc_idx(ctx, this->shdr.sh_size / sizeof(Word)); this->shdr.sh_size += sizeof(Word) * 2; @@ -1262,11 +1436,11 @@ void GotSection::copy_buf(Context &ctx) { buf[0] = ctx.dynamic->shdr.sh_addr; // arm64 psABI doesn't say anything about GOT[0], but glibc/arm64's code - // path for -static-pie wrongly assumed that GOT[0] refers _DYNAMIC. + // path for -static-pie wrongly assumed that GOT[0] refers to _DYNAMIC. // // https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=43d06ed218fc8be5 if constexpr (is_arm64) - if (ctx.dynamic && ctx.arg.is_static && ctx.arg.pie) + if (ctx.dynamic && ctx.arg.static_ && ctx.arg.pie) buf[0] = ctx.dynamic->shdr.sh_addr; ElfRel *rel = (ElfRel *)(ctx.buf + ctx.reldyn->shdr.sh_offset + @@ -1307,13 +1481,10 @@ void GotSection::copy_buf(Context &ctx) { template void GotSection::construct_relr(Context &ctx) { - assert(ctx.arg.pack_dyn_relocs_relr); - std::vector pos; for (GotEntry &ent : get_got_entries(ctx)) if (ent.is_relr(ctx)) pos.push_back(ent.idx * sizeof(Word)); - this->relr = encode_relr(pos); } @@ -1600,9 +1771,6 @@ ElfSym to_output_esym(Context &ctx, Symbol &sym, u32 st_name, if constexpr (is_ppc64v2) esym.ppc_local_entry = sym.esym().ppc_local_entry; - if constexpr (is_alpha) - esym.alpha_st_other = sym.esym().alpha_st_other; - auto get_st_shndx = [&](Symbol &sym) -> u32 { if (SectionFragment *frag = sym.get_frag()) if (frag->is_alive) @@ -1615,7 +1783,7 @@ ElfSym to_output_esym(Context &ctx, Symbol &sym, u32 st_name, if (InputSection *isec = sym.get_input_section()) { if (isec->is_alive) return isec->output_section->shndx; - else if (isec->is_killed_by_icf()) + if (isec->icf_removed()) return isec->leader->output_section->shndx; } @@ -1623,10 +1791,14 @@ ElfSym to_output_esym(Context &ctx, Symbol &sym, u32 st_name, }; i64 shndx = -1; + InputSection *isec = sym.get_input_section(); + if (sym.has_copyrel) { + // Symbol in .copyrel shndx = sym.is_copyrel_readonly ? ctx.copyrel_relro->shndx : ctx.copyrel->shndx; esym.st_value = sym.get_addr(ctx); } else if (sym.file->is_dso || sym.esym().is_undef()) { + // Undefined symbol in a DSO esym.st_shndx = SHN_UNDEF; esym.st_size = 0; if (sym.is_canonical) @@ -1639,7 +1811,7 @@ ElfSym to_output_esym(Context &ctx, Symbol &sym, u32 st_name, // Section fragment shndx = frag->output_section.shndx; esym.st_value = sym.get_addr(ctx); - } else if (!sym.get_input_section()) { + } else if (!isec) { // Absolute symbol esym.st_shndx = SHN_ABS; esym.st_value = sym.get_addr(ctx); @@ -1653,7 +1825,22 @@ ElfSym to_output_esym(Context &ctx, Symbol &sym, u32 st_name, esym.st_type = STT_FUNC; esym.st_visibility = sym.visibility; esym.st_value = sym.get_plt_addr(ctx); + } else if ((isec->shdr().sh_flags & SHF_MERGE) && + !(isec->shdr().sh_flags & SHF_ALLOC)) { + // Symbol in a mergeable non-SHF_ALLOC section, such as .debug_str + ObjectFile *file = (ObjectFile *)sym.file; + MergeableSection &m = + *file->mergeable_sections[file->get_shndx(sym.esym())]; + + SectionFragment *frag; + i64 frag_addend; + std::tie(frag, frag_addend) = m.get_fragment(sym.esym().st_value); + + shndx = m.parent.shndx; + esym.st_visibility = sym.visibility; + esym.st_value = frag->get_addr(ctx) + frag_addend; } else { + // Symbol in a regular section shndx = get_st_shndx(sym); esym.st_visibility = sym.visibility; esym.st_value = sym.get_addr(ctx, NO_PLT); @@ -1677,8 +1864,6 @@ ElfSym to_output_esym(Context &ctx, Symbol &sym, u32 st_name, template void DynsymSection::add_symbol(Context &ctx, Symbol *sym) { - assert(!finalized); - if (symbols.empty()) symbols.resize(1); @@ -1688,62 +1873,6 @@ void DynsymSection::add_symbol(Context &ctx, Symbol *sym) { } } -template -void DynsymSection::finalize(Context &ctx) { - Timer t(ctx, "DynsymSection::finalize"); - assert(!finalized); - finalized = true; - - if (symbols.empty()) - return; - - // Sort symbols. In any symtab, local symbols must precede global symbols. - auto first_global = std::stable_partition(symbols.begin() + 1, symbols.end(), - [&](Symbol *sym) { - return sym->is_local(ctx); - }); - - // We also place undefined symbols before defined symbols for .gnu.hash. - // Defined symbols are sorted by their hashes for .gnu.hash. - if (ctx.gnu_hash) { - // Count the number of exported symbols to compute the size of .gnu.hash. - i64 num_exported = 0; - for (i64 i = 1; i < symbols.size(); i++) - if (symbols[i]->is_exported) - num_exported++; - - u32 num_buckets = num_exported / ctx.gnu_hash->LOAD_FACTOR + 1; - ctx.gnu_hash->num_buckets = num_buckets; - - tbb::parallel_for_each(first_global, symbols.end(), [&](Symbol *sym) { - sym->set_djb_hash(ctx, djb_hash(sym->name())); - }); - - tbb::parallel_sort(first_global, symbols.end(), - [&](Symbol *a, Symbol *b) { - if (a->is_exported != b->is_exported) - return b->is_exported; - - return std::tuple(a->get_djb_hash(ctx) % num_buckets, a->name()) < - std::tuple(b->get_djb_hash(ctx) % num_buckets, b->name()); - }); - } - - // Compute .dynstr size - ctx.dynstr->dynsym_offset = ctx.dynstr->shdr.sh_size; - - tbb::enumerable_thread_specific size; - tbb::parallel_for((i64)1, (i64)symbols.size(), [&](i64 i) { - symbols[i]->set_dynsym_idx(ctx, i); - size.local() += symbols[i]->name().size() + 1; - }); - - ctx.dynstr->shdr.sh_size += size.combine(std::plus()); - - // ELF's symbol table sh_info holds the offset of the first global symbol. - this->shdr.sh_info = first_global - symbols.begin(); -} - template void DynsymSection::update_shdr(Context &ctx) { this->shdr.sh_link = ctx.dynstr->shndx; @@ -1802,34 +1931,20 @@ void HashSection::copy_buf(Context &ctx) { } } -template -static std::span *> get_exported_symbols(Context &ctx) { - std::span *> syms = ctx.dynsym->symbols; - auto it = std::partition_point(syms.begin() + 1, syms.end(), - [](Symbol *sym) { - return !sym->is_exported; - }); - return syms.subspan(it - syms.begin()); -} - template void GnuHashSection::update_shdr(Context &ctx) { if (ctx.dynsym->symbols.empty()) return; - this->shdr.sh_link = ctx.dynsym->shndx; - - i64 num_exported = get_exported_symbols(ctx).size(); - if (num_exported) { - // We allocate 12 bits for each symbol in the bloom filter. - i64 num_bits = num_exported * 12; - num_bloom = bit_ceil(num_bits / (sizeof(Word) * 8)); - } + // We allocate 12 bits for each symbol in the bloom filter. + num_bloom = bit_ceil((num_exported * 12) / (sizeof(Word) * 8)); this->shdr.sh_size = HEADER_SIZE; // Header this->shdr.sh_size += num_bloom * sizeof(Word); // Bloom filter this->shdr.sh_size += num_buckets * 4; // Hash buckets this->shdr.sh_size += num_exported * 4; // Hash values + + this->shdr.sh_link = ctx.dynsym->shndx; } template @@ -1837,12 +1952,15 @@ void GnuHashSection::copy_buf(Context &ctx) { u8 *base = ctx.buf + this->shdr.sh_offset; memset(base, 0, this->shdr.sh_size); - std::span *> syms = get_exported_symbols(ctx); - std::vector indices(syms.size()); - i64 exported_offset = ctx.dynsym->symbols.size() - syms.size(); + i64 first_exported = ctx.dynsym->symbols.size() - num_exported; + + std::span *> syms = ctx.dynsym->symbols; + syms = syms.subspan(first_exported); + + std::vector indices(num_exported); *(U32 *)base = num_buckets; - *(U32 *)(base + 4) = exported_offset; + *(U32 *)(base + 4) = first_exported; *(U32 *)(base + 8) = num_bloom; *(U32 *)(base + 12) = BLOOM_SHIFT; @@ -1865,7 +1983,7 @@ void GnuHashSection::copy_buf(Context &ctx) { for (i64 i = 0; i < syms.size(); i++) if (!buckets[indices[i]]) - buckets[indices[i]] = i + exported_offset; + buckets[indices[i]] = i + first_exported; // Write a hash table U32 *table = buckets + num_buckets; @@ -1915,15 +2033,26 @@ MergedSection::MergedSection(std::string_view name, i64 flags, i64 type, template MergedSection * MergedSection::get_instance(Context &ctx, std::string_view name, - i64 type, i64 flags, - i64 entsize, i64 addralign) { + const ElfShdr &shdr) { + if (!(shdr.sh_flags & SHF_MERGE)) + return nullptr; + + i64 addralign = std::max(1, shdr.sh_addralign); + i64 flags = shdr.sh_flags & ~(u64)SHF_GROUP & ~(u64)SHF_COMPRESSED; + + i64 entsize = shdr.sh_entsize; + if (entsize == 0) + entsize = (shdr.sh_flags & SHF_STRINGS) ? 1 : (i64)shdr.sh_addralign; + if (entsize == 0) + return nullptr; + name = get_merged_output_name(ctx, name, flags, entsize, addralign); - flags = flags & ~(u64)SHF_GROUP & ~(u64)SHF_COMPRESSED; auto find = [&]() -> MergedSection * { for (std::unique_ptr> &osec : ctx.merged_sections) if (name == osec->name && flags == osec->shdr.sh_flags && - type == osec->shdr.sh_type && entsize == osec->shdr.sh_entsize) + shdr.sh_type == osec->shdr.sh_type && + entsize == osec->shdr.sh_entsize) return osec.get(); return nullptr; }; @@ -1941,7 +2070,7 @@ MergedSection::get_instance(Context &ctx, std::string_view name, if (MergedSection *osec = find()) return osec; - MergedSection *osec = new MergedSection(name, flags, type, entsize); + MergedSection *osec = new MergedSection(name, flags, shdr.sh_type, entsize); ctx.merged_sections.emplace_back(osec); return osec; } @@ -1962,7 +2091,55 @@ MergedSection::insert(Context &ctx, std::string_view data, u64 hash, } template -void MergedSection::assign_offsets(Context &ctx) { +static std::string get_cmdline_args(Context &ctx) { + std::stringstream ss; + ss << ctx.cmdline_args[1]; + for (i64 i = 2; i < ctx.cmdline_args.size(); i++) + ss << " " << ctx.cmdline_args[i]; + return ss.str(); +} + +// Add strings to .comment +template +static void add_comment_strings(Context &ctx) { + auto add = [&](std::string str) { + std::string_view buf = save_string(ctx, str); + std::string_view data(buf.data(), buf.size() + 1); + ctx.comment->insert(ctx, data, hash_string(data), 0); + }; + + // Add an identification string to .comment. + add(get_mold_version()); + + // Embed command line arguments for debugging. + char *env = getenv("MOLD_DEBUG"); + if (env && env[0]) + add("mold command line: " + get_cmdline_args(ctx)); +} + +template +void MergedSection::resolve(Context &ctx) { + tbb::parallel_for_each(members, [&](MergeableSection *sec) { + sec->split_contents(ctx); + }); + + // We aim 2/3 occupation ratio + map.resize(estimator.get_cardinality() * 3 / 2); + + tbb::parallel_for_each(members, [&](MergeableSection *sec) { + sec->resolve_contents(ctx); + }); + + if (this == ctx.comment) + add_comment_strings(ctx); + resolved = true; +} + +template +void MergedSection::compute_section_size(Context &ctx) { + if (!resolved) + resolve(ctx); + std::vector sizes(map.NUM_SHARDS); Atomic alignment = 1; @@ -2014,11 +2191,11 @@ void MergedSection::assign_offsets(Context &ctx) { template void MergedSection::copy_buf(Context &ctx) { - write_to(ctx, ctx.buf + this->shdr.sh_offset); + write_to(ctx, ctx.buf + this->shdr.sh_offset, nullptr); } template -void MergedSection::write_to(Context &ctx, u8 *buf) { +void MergedSection::write_to(Context &ctx, u8 *buf, ElfRel *rel) { i64 shard_size = map.nbuckets / map.NUM_SHARDS; tbb::parallel_for((i64)0, map.NUM_SHARDS, [&](i64 i) { @@ -2051,14 +2228,6 @@ template void EhFrameSection::construct(Context &ctx) { Timer t(ctx, "eh_frame"); - // If .eh_frame is missing in all input files, we don't want to - // create an output .eh_frame section. - if (std::all_of(ctx.objs.begin(), ctx.objs.end(), - [](ObjectFile *file) { return file->cies.empty(); })) { - this->shdr.sh_size = 0; - return; - } - // Remove dead FDEs and assign them offsets within their corresponding // CIE group. tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { @@ -2291,6 +2460,16 @@ void CopyrelSection::add_symbol(Context &ctx, Symbol *sym) { assert(!ctx.arg.shared); assert(sym->file->is_dso); + if (sym->esym().st_visibility == STV_PROTECTED) + Error(ctx) << *sym->file + << ": cannot create a copy relocation for protected symbol '" + << *sym << "'; recompile with -fPIC"; + + if (!ctx.arg.z_copyreloc) + Error(ctx) << "-z nocopyreloc: " << *sym->file + << ": cannot create a copy relocation for symbol '" << *sym + << "'; recompile with -fPIC"; + symbols.push_back(sym); SharedFile &file = *(SharedFile *)sym->file; @@ -2347,8 +2526,8 @@ void VersymSection::copy_buf(Context &ctx) { // // .relr.dyn is relatively new feature and not supported by glibc until // 2.38 which was released in 2022. If we don't do anything, executables -// built with `-z pack-relative-relocs` 't work and would crash -// immediately on startup with an older version of glibc. +// built with `-z pack-relative-relocs` would just crash immediately on +// startup with an older version of glibc. // // As a workaround, we'll add a dependency to a dummy version name // "GLIBC_ABI_DT_RELR" if `-z pack-relative-relocs` is given so that @@ -2526,89 +2705,21 @@ void VerdefSection::copy_buf(Context &ctx) { write_vector(ctx.buf + this->shdr.sh_offset, contents); } -inline i64 BuildId::size() const { - switch (kind) { - case HEX: - return value.size(); - case HASH: - return hash_size; - case UUID: - return 16; - default: - unreachable(); - } -} - template void BuildIdSection::update_shdr(Context &ctx) { - this->shdr.sh_size = HEADER_SIZE + ctx.arg.build_id.size(); + this->shdr.sh_size = ctx.arg.build_id.size() + 16; // +16 for the header } template void BuildIdSection::copy_buf(Context &ctx) { U32 *base = (U32 *)(ctx.buf + this->shdr.sh_offset); memset(base, 0, this->shdr.sh_size); - base[0] = 4; // Name size - base[1] = ctx.arg.build_id.size(); // Hash size - base[2] = NT_GNU_BUILD_ID; // Type - memcpy(base + 3, "GNU", 4); // Name string -} -// BLAKE3 is a cryptographic hash function just like SHA256. -// We use it instead of SHA256 because it's faster. -static void blake3_hash(u8 *buf, i64 size, u8 *out) { - blake3_hasher hasher; - blake3_hasher_init(&hasher); - blake3_hasher_update(&hasher, buf, size); - blake3_hasher_finalize(&hasher, out, BLAKE3_OUT_LEN); -} - -template -void BuildIdSection::write_buildid(Context &ctx) { - Timer t(ctx, "build_id"); - u8 *buf = ctx.buf + this->shdr.sh_offset + HEADER_SIZE; - - switch (ctx.arg.build_id.kind) { - case BuildId::HEX: - write_vector(buf, ctx.arg.build_id.value); - return; - case BuildId::HASH: { - i64 shard_size = 4 * 1024 * 1024; - i64 filesize = ctx.output_file->filesize; - i64 num_shards = align_to(filesize, shard_size) / shard_size; - std::vector shards(num_shards * BLAKE3_OUT_LEN); - - tbb::parallel_for((i64)0, num_shards, [&](i64 i) { - u8 *begin = ctx.buf + shard_size * i; - u8 *end = (i == num_shards - 1) ? ctx.buf + filesize : begin + shard_size; - blake3_hash(begin, end - begin, shards.data() + i * BLAKE3_OUT_LEN); - -#ifdef HAVE_MADVISE - // Make the kernel page out the file contents we've just written - // so that subsequent close(2) call will become quicker. - if (i > 0 && ctx.output_file->is_mmapped) - madvise(begin, end - begin, MADV_DONTNEED); -#endif - }); - - u8 digest[BLAKE3_OUT_LEN]; - blake3_hash(shards.data(), shards.size(), digest); - - assert(ctx.arg.build_id.size() <= BLAKE3_OUT_LEN); - memcpy(buf, digest, ctx.arg.build_id.size()); - return; - } - case BuildId::UUID: { - get_random_bytes(buf, 16); - - // Indicate that this is UUIDv4 as defined by RFC4122 - buf[6] = (buf[6] & 0b0000'1111) | 0b0100'0000; - buf[8] = (buf[8] & 0b0011'1111) | 0b1000'0000; - return; - } - default: - unreachable(); - } + base[0] = 4; // Name size + base[1] = ctx.arg.build_id.size(); // Hash size + base[2] = NT_GNU_BUILD_ID; // Type + memcpy(base + 3, "GNU", 4); // Name string + write_vector(base + 4, contents); // Build ID } template @@ -2692,6 +2803,8 @@ void NotePropertySection::update_shdr(Context &ctx) { if (ctx.arg.z_shstk) properties[GNU_PROPERTY_X86_FEATURE_1_AND] |= GNU_PROPERTY_X86_FEATURE_1_SHSTK; + properties[GNU_PROPERTY_X86_ISA_1_NEEDED] |= ctx.arg.z_x86_64_isa_level; + std::erase_if(properties, [](std::pair kv) { return kv.second == 0; }); @@ -2730,7 +2843,7 @@ CompressedSection::CompressedSection(Context &ctx, Chunk &chunk) { this->uncompressed_data.resize(chunk.shdr.sh_size); u8 *buf = this->uncompressed_data.data(); - chunk.write_to(ctx, buf); + chunk.write_to(ctx, buf, nullptr); switch (ctx.arg.compress_debug_sections) { case COMPRESS_ZLIB: @@ -2846,10 +2959,6 @@ void RelocSection::copy_buf(Context &ctx) { i64 addend; std::tie(symidx, addend) = get_symidx_addend(isec, rel); - if constexpr (is_alpha) - if (rel.r_type == R_ALPHA_GPDISP || rel.r_type == R_ALPHA_LITUSE) - addend = rel.r_addend; - i64 r_offset = isec.output_section->shdr.sh_addr + isec.offset + rel.r_offset; out = ElfRel(r_offset, rel.r_type, symidx, addend); @@ -2888,6 +2997,20 @@ void ComdatGroupSection::copy_buf(Context &ctx) { *buf++ = chunk->shndx; } +template +void GnuDebuglinkSection::update_shdr(Context &ctx) { + filename = std::filesystem::path(ctx.arg.separate_debug_file).filename().string(); + this->shdr.sh_size = align_to(filename.size() + 1, 4) + 4; +} + +template +void GnuDebuglinkSection::copy_buf(Context &ctx) { + u8 *buf = ctx.buf + this->shdr.sh_offset; + memset(buf, 0, this->shdr.sh_size); + write_string(buf, filename); + *(U32 *)(buf + this->shdr.sh_size - 4) = crc32; +} + using E = MOLD_TARGET; template class Chunk; @@ -2926,10 +3049,11 @@ template class GdbIndexSection; template class CompressedSection; template class RelocSection; template class ComdatGroupSection; +template class GnuDebuglinkSection; -template OutputSection *find_section(Context &, u32); -template OutputSection *find_section(Context &, std::string_view); +template Chunk *find_chunk(Context &, u32); +template Chunk *find_chunk(Context &, std::string_view); template i64 to_phdr_flags(Context &ctx, Chunk *chunk); template ElfSym to_output_esym(Context &, Symbol &, u32, U32 *); -} // namespace mold::elf +} // namespace mold diff --git a/src/output-file-unix.cc b/src/output-file-unix.cc new file mode 100644 index 00000000..0a6f9eb2 --- /dev/null +++ b/src/output-file-unix.cc @@ -0,0 +1,200 @@ +#include "mold.h" + +#include +#include +#include +#include +#include +#include + +namespace mold { + +static u32 get_umask() { + u32 orig_umask = umask(0); + umask(orig_umask); + return orig_umask; +} + +template +static int +open_or_create_file(Context &ctx, std::string path, std::string tmpfile, + int perm) { + // Reuse an existing file if exists and writable because on Linux, + // writing to an existing file is much faster than creating a fresh + // file and writing to it. + if (ctx.overwrite_output_file && rename(path.c_str(), tmpfile.c_str()) == 0) { + i64 fd = ::open(tmpfile.c_str(), O_RDWR | O_CREAT, perm); + if (fd != -1) + return fd; + unlink(tmpfile.c_str()); + } + + i64 fd = ::open(tmpfile.c_str(), O_RDWR | O_CREAT, perm); + if (fd == -1) + Fatal(ctx) << "cannot open " << tmpfile << ": " << errno_string(); + return fd; +} + +template +class MemoryMappedOutputFile : public OutputFile { +public: + MemoryMappedOutputFile(Context &ctx, std::string path, i64 filesize, int perm) + : OutputFile(path, filesize, true) { + std::filesystem::path dir = filepath(path).parent_path(); + std::string filename = filepath(path).filename().string(); + std::string tmpfile = dir / ("." + filename + "." + std::to_string(getpid())); + + this->fd = open_or_create_file(ctx, path, tmpfile, perm); + + if (fchmod(this->fd, perm & ~get_umask()) == -1) + Fatal(ctx) << "fchmod failed: " << errno_string(); + + if (ftruncate(this->fd, filesize) == -1) + Fatal(ctx) << "ftruncate failed: " << errno_string(); + + output_tmpfile = (char *)save_string(ctx, tmpfile).data(); + +#ifdef __linux__ + fallocate(this->fd, 0, 0, filesize); +#endif + + this->buf = (u8 *)mmap(nullptr, filesize, PROT_READ | PROT_WRITE, + MAP_SHARED, this->fd, 0); + if (this->buf == MAP_FAILED) + Fatal(ctx) << path << ": mmap failed: " << errno_string(); + + mold::output_buffer_start = this->buf; + mold::output_buffer_end = this->buf + filesize; + } + + ~MemoryMappedOutputFile() { + if (fd2 != -1) + ::close(fd2); + } + + void close(Context &ctx) override { + Timer t(ctx, "close_file"); + + if (!this->is_unmapped) + munmap(this->buf, this->filesize); + + if (this->buf2.empty()) { + ::close(this->fd); + } else { + FILE *out = fdopen(this->fd, "w"); + fseek(out, 0, SEEK_END); + fwrite(&this->buf2[0], this->buf2.size(), 1, out); + fclose(out); + } + + // If an output file already exists, open a file and then remove it. + // This is the fastest way to unlink a file, as it does not make the + // system to immediately release disk blocks occupied by the file. + fd2 = ::open(this->path.c_str(), O_RDONLY); + if (fd2 != -1) + unlink(this->path.c_str()); + + if (rename(output_tmpfile, this->path.c_str()) == -1) + Fatal(ctx) << this->path << ": rename failed: " << errno_string(); + output_tmpfile = nullptr; + } + +private: + int fd2 = -1; +}; + +template +std::unique_ptr> +OutputFile::open(Context &ctx, std::string path, i64 filesize, int perm) { + Timer t(ctx, "open_file"); + + if (path.starts_with('/') && !ctx.arg.chroot.empty()) + path = ctx.arg.chroot + "/" + path_clean(path); + + bool is_special = false; + if (path == "-") { + is_special = true; + } else { + struct stat st; + if (stat(path.c_str(), &st) == 0 && (st.st_mode & S_IFMT) != S_IFREG) + is_special = true; + } + + OutputFile *file; + if (is_special) + file = new MallocOutputFile(ctx, path, filesize, perm); + else + file = new MemoryMappedOutputFile(ctx, path, filesize, perm); + +#ifdef MADV_HUGEPAGE + // Enable transparent huge page for an output memory-mapped file. + // On Linux, it has an effect only on tmpfs mounted with `huge=advise`, + // but it can make the linker ~10% faster. You can try it by creating + // a tmpfs with the following commands + // + // $ mkdir tmp + // $ sudo mount -t tmpfs -o size=2G,huge=advise none tmp + // + // and then specifying a path under the directory as an output file. + madvise(file->buf, filesize, MADV_HUGEPAGE); +#endif + + if (ctx.arg.filler != -1) + memset(file->buf, ctx.arg.filler, filesize); + return std::unique_ptr(file); +} + +// LockingOutputFile is similar to MemoryMappedOutputFile, but it doesn't +// rename output files and instead acquires file lock using flock(). +template +LockingOutputFile::LockingOutputFile(Context &ctx, std::string path, + int perm) + : OutputFile(path, 0, true) { + this->fd = ::open(path.c_str(), O_RDWR | O_CREAT, perm); + if (this->fd == -1) + Fatal(ctx) << "cannot open " << path << ": " << errno_string(); + flock(this->fd, LOCK_EX); + + // We may be overwriting to an existing debug info file. We want to + // make the file unusable so that gdb won't use it by accident until + // it's ready. + u8 buf[256] = {}; + (void)!!write(this->fd, buf, sizeof(buf)); +} + +template +void LockingOutputFile::resize(Context &ctx, i64 filesize) { + if (ftruncate(this->fd, filesize) == -1) + Fatal(ctx) << "ftruncate failed: " << errno_string(); + + this->buf = (u8 *)mmap(nullptr, filesize, PROT_READ | PROT_WRITE, + MAP_SHARED, this->fd, 0); + if (this->buf == MAP_FAILED) + Fatal(ctx) << this->path << ": mmap failed: " << errno_string(); + + this->filesize = filesize; + mold::output_buffer_start = this->buf; + mold::output_buffer_end = this->buf + filesize; +} + +template +void LockingOutputFile::close(Context &ctx) { + if (!this->is_unmapped) + munmap(this->buf, this->filesize); + + if (!this->buf2.empty()) { + FILE *out = fdopen(this->fd, "w"); + fseek(out, 0, SEEK_END); + fwrite(&this->buf2[0], this->buf2.size(), 1, out); + fclose(out); + } + + ::close(this->fd); +} + +using E = MOLD_TARGET; + +template class OutputFile; +template class LockingOutputFile; + +} // namespace mold diff --git a/src/output-file-win32.cc b/src/output-file-win32.cc new file mode 100644 index 00000000..68bd26c8 --- /dev/null +++ b/src/output-file-win32.cc @@ -0,0 +1,118 @@ +#include "mold.h" + +#include +#include +#include + +namespace mold { + +template +class MemoryMappedOutputFile : public OutputFile { +public: + MemoryMappedOutputFile(Context &ctx, std::string path, i64 filesize, int perm) + : OutputFile(path, filesize, true) { + // TODO: use intermediate temporary file for output. + DWORD attrs = (perm & 0200) ? FILE_ATTRIBUTE_NORMAL : FILE_ATTRIBUTE_READONLY; + + handle = CreateFileA(path.c_str(), GENERIC_READ | GENERIC_WRITE, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + nullptr, CREATE_ALWAYS, attrs, nullptr); + if (handle == INVALID_HANDLE_VALUE) + Fatal(ctx) << "cannot open " << path << ": " << GetLastError(); + + HANDLE map = CreateFileMapping(handle, nullptr, PAGE_READWRITE, 0, + filesize, nullptr); + if (!map) + Fatal(ctx) << path << ": CreateFileMapping failed: " << GetLastError(); + + this->buf = (u8 *)MapViewOfFile(map, FILE_MAP_WRITE, 0, 0, filesize); + if (!this->buf) + Fatal(ctx) << path << ": MapViewOfFile failed: " << GetLastError(); + + CloseHandle(map); + + mold::output_buffer_start = this->buf; + mold::output_buffer_end = this->buf + filesize; + } + + ~MemoryMappedOutputFile() { + if (handle != INVALID_HANDLE_VALUE) + CloseHandle(handle); + } + + void close(Context &ctx) override { + Timer t(ctx, "close_file"); + + UnmapViewOfFile(this->buf); + + if (!this->buf2.empty()) { + if (SetFilePointer(handle, 0, nullptr, FILE_END) == INVALID_SET_FILE_POINTER) + Fatal(ctx) << this->path << ": SetFilePointer failed: " + << GetLastError(); + + DWORD written; + if (!WriteFile(handle, this->buf2.data(), this->buf2.size(), &written, + nullptr)) + Fatal(ctx) << this->path << ": WriteFile failed: " << GetLastError(); + } + + CloseHandle(handle); + handle = INVALID_HANDLE_VALUE; + } + +private: + HANDLE handle; +}; + +template +std::unique_ptr> +OutputFile::open(Context &ctx, std::string path, i64 filesize, int perm) { + Timer t(ctx, "open_file"); + + if (path.starts_with('/') && !ctx.arg.chroot.empty()) + path = ctx.arg.chroot + "/" + path_clean(path); + + bool is_special = false; + if (path == "-") { + is_special = true; + } else { + HANDLE h = CreateFileA(path.c_str(), GENERIC_READ, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); + if (h != INVALID_HANDLE_VALUE) { + if (GetFileType(h) != FILE_TYPE_DISK) + is_special = true; + CloseHandle(h); + } + } + + OutputFile *file; + if (is_special) + file = new MallocOutputFile(ctx, path, filesize, perm); + else + file = new MemoryMappedOutputFile(ctx, path, filesize, perm); + + if (ctx.arg.filler != -1) + memset(file->buf, ctx.arg.filler, filesize); + return std::unique_ptr>(file); +} + +template +LockingOutputFile::LockingOutputFile(Context &ctx, std::string path, + int perm) + : OutputFile(path, 0, true) { + Fatal(ctx) << "LockingOutputFile is not supported on Windows"; +} + +template +void LockingOutputFile::resize(Context &ctx, i64 filesize) {} + +template +void LockingOutputFile::close(Context &ctx) {} + +using E = MOLD_TARGET; + +template class OutputFile; +template class LockingOutputFile; + +} // namespace mold diff --git a/elf/passes.cc b/src/passes.cc similarity index 83% rename from elf/passes.cc rename to src/passes.cc index 49fa569f..807bb2bc 100644 --- a/elf/passes.cc +++ b/src/passes.cc @@ -1,4 +1,5 @@ #include "mold.h" +#include "blake3.h" #include #include @@ -11,49 +12,47 @@ #include #include -namespace mold::elf { +namespace mold { -// Since elf_main is a template, we can't run it without a type parameter. -// We speculatively run elf_main with X86_64, and if the speculation was +// Since mold_main is a template, we can't run it without a type parameter. +// We speculatively run mold_main with X86_64, and if the speculation was // wrong, re-run it with an actual machine type. template int redo_main(Context &ctx, int argc, char **argv) { std::string_view target = ctx.arg.emulation; if (target == I386::target_name) - return elf_main(argc, argv); + return mold_main(argc, argv); if (target == ARM64::target_name) - return elf_main(argc, argv); + return mold_main(argc, argv); if (target == ARM32::target_name) - return elf_main(argc, argv); + return mold_main(argc, argv); if (target == RV64LE::target_name) - return elf_main(argc, argv); + return mold_main(argc, argv); if (target == RV64BE::target_name) - return elf_main(argc, argv); + return mold_main(argc, argv); if (target == RV32LE::target_name) - return elf_main(argc, argv); + return mold_main(argc, argv); if (target == RV32BE::target_name) - return elf_main(argc, argv); + return mold_main(argc, argv); if (target == PPC32::target_name) - return elf_main(argc, argv); + return mold_main(argc, argv); if (target == PPC64V1::target_name) - return elf_main(argc, argv); + return mold_main(argc, argv); if (target == PPC64V2::target_name) - return elf_main(argc, argv); + return mold_main(argc, argv); if (target == S390X::target_name) - return elf_main(argc, argv); + return mold_main(argc, argv); if (target == SPARC64::target_name) - return elf_main(argc, argv); + return mold_main(argc, argv); if (target == M68K::target_name) - return elf_main(argc, argv); + return mold_main(argc, argv); if (target == SH4::target_name) - return elf_main(argc, argv); - if (target == ALPHA::target_name) - return elf_main(argc, argv); + return mold_main(argc, argv); if (target == LOONGARCH32::target_name) - return elf_main(argc, argv); + return mold_main(argc, argv); if (target == LOONGARCH64::target_name) - return elf_main(argc, argv); + return mold_main(argc, argv); unreachable(); } @@ -155,6 +154,8 @@ void create_synthetic_sections(Context &ctx) { ctx.verdef = push(new VerdefSection); if (ctx.arg.emit_relocs) ctx.eh_frame_reloc = push(new EhFrameRelocSection); + if (!ctx.arg.separate_debug_file.empty()) + ctx.gnu_debuglink = push(new GnuDebuglinkSection); if (ctx.arg.shared || !ctx.dsos.empty() || ctx.arg.pie) { ctx.dynamic = push(new DynamicSection(ctx)); @@ -170,6 +171,13 @@ void create_synthetic_sections(Context &ctx) { ctx.note_package = push(new NotePackageSection); ctx.note_property = push(new NotePropertySection); + if (!ctx.arg.oformat_binary) { + ElfShdr shdr = {}; + shdr.sh_type = SHT_PROGBITS; + shdr.sh_flags = SHF_MERGE | SHF_STRINGS; + ctx.comment = MergedSection::get_instance(ctx, ".comment", shdr); + } + if constexpr (is_riscv) ctx.extra.riscv_attributes = push(new RiscvAttributesSection); @@ -178,15 +186,6 @@ void create_synthetic_sections(Context &ctx) { if constexpr (is_ppc64v2) ctx.extra.save_restore = push(new PPC64SaveRestoreSection); - - if constexpr (is_sparc) { - if (ctx.arg.is_static) - ctx.extra.tls_get_addr_sec = push(new SparcTlsGetAddrSection); - ctx.extra.tls_get_addr_sym = get_symbol(ctx, "__tls_get_addr"); - } - - if constexpr (is_alpha) - ctx.extra.got = push(new AlphaGotSection); } template @@ -252,173 +251,146 @@ static void clear_symbols(Context &ctx) { } template -void do_resolve_symbols(Context &ctx) { +void resolve_symbols(Context &ctx) { + Timer t(ctx, "resolve_symbols"); + std::vector *> files; append(files, ctx.objs); append(files, ctx.dsos); - // Due to legacy reasons, archive members will only get included in the final - // binary if they satisfy one of the undefined symbols in a non-archive object - // file. This is called archive extraction. In finalize_archive_extraction, - // this is processed as follows: - // - // 1. Do preliminary symbol resolution assuming all archive members - // are included. This matches the undefined symbols with ones to be - // extracted from archives. - // - // 2. Do a mark & sweep pass to eliminate unneeded archive members. - // - // Note that the symbol resolution inside finalize_archive_extraction uses a - // different rule. In order to prevent extracting archive members that can be - // satisfied by either non-archive object files or DSOs, the archive members - // are given a lower priority. This is not correct for the general case, where - // *extracted* object files have precedence over DSOs and even non-archive - // files that are passed earlier in the command line. Hence, the symbol - // resolution is thrown away once we determine which archive members to - // extract, and redone later with the formal rule. - { - Timer t(ctx, "extract_archive_members"); - - // Register symbols + for (;;) { + // Call resolve_symbols() to find the most appropriate file for each + // symbol. And then mark reachable objects to decide which files to + // include into an output. tbb::parallel_for_each(files, [&](InputFile *file) { file->resolve_symbols(ctx); }); - // Mark reachable objects to decide which files to include into an output. - // This also merges symbol visibility. mark_live_objects(ctx); - // Cleanup. The rule used for archive extraction isn't accurate for the - // general case of symbol extraction, so reset the resolution to be redone - // later. + // Now that we know the exact set of input files that are to be + // included in the output file, we want to redo symbol resolution. + // This is because symbols defined by object files in archive files + // may have risen as a result of mark_live_objects(). + // + // To redo symbol resolution, we want to clear the state first. clear_symbols(ctx); - // Now that the symbol references are gone, remove the eliminated files from - // the file list. - std::erase_if(files, [](InputFile *file) { return !file->is_alive; }); - std::erase_if(ctx.objs, [](InputFile *file) { return !file->is_alive; }); - std::erase_if(ctx.dsos, [](InputFile *file) { return !file->is_alive; }); - } - - // COMDAT elimination needs to happen exactly here. - // - // It needs to be after archive extraction, otherwise we might assign COMDAT - // leader to an archive member that is not supposed to be extracted. - // - // It needs to happen before symbol resolution, otherwise we could eliminate - // a symbol that is already resolved to and cause dangling references. - { - Timer t(ctx, "eliminate_comdats"); - + // COMDAT elimination needs to happen exactly here. + // + // It needs to be after archive extraction, otherwise we might + // assign COMDAT leader to an archive member that is not supposed to + // be extracted. + // + // It needs to happen before the final symbol resolution, otherwise + // we could eliminate a symbol that is already resolved to and cause + // dangling references. tbb::parallel_for_each(ctx.objs, [](ObjectFile *file) { - for (ComdatGroupRef &ref : file->comdat_groups) - update_minimum(ref.group->owner, file->priority); + if (file->is_alive) + for (ComdatGroupRef &ref : file->comdat_groups) + update_minimum(ref.group->owner, file->priority); }); tbb::parallel_for_each(ctx.objs, [](ObjectFile *file) { - for (ComdatGroupRef &ref : file->comdat_groups) - if (ref.group->owner != file->priority) - for (u32 i : ref.members) - if (file->sections[i]) - file->sections[i]->kill(); + if (file->is_alive) + for (ComdatGroupRef &ref : file->comdat_groups) + if (ref.group->owner != file->priority) + for (u32 i : ref.members) + if (InputSection *isec = file->sections[i].get()) + isec->is_alive = false; }); - } - // Since we have turned on object files live bits, their symbols - // may now have higher priority than before. So run the symbol - // resolution pass again to get the final resolution result. - tbb::parallel_for_each(files, [&](InputFile *file) { - file->resolve_symbols(ctx); - }); -} - -template -void resolve_symbols(Context &ctx) { - Timer t(ctx, "resolve_symbols"); + // Redo symbol resolution + tbb::parallel_for_each(files, [&](InputFile *file) { + if (file->is_alive) + file->resolve_symbols(ctx); + }); - std::vector *> objs = ctx.objs; - std::vector *> dsos = ctx.dsos; + // Symbols with hidden visibility need to be resolved within the + // output file. If a hidden symbol was resolved to a DSO, we'll redo + // symbol resolution from scratch with the flag to skip that symbol + // next time. This should be rare. + std::atomic_bool flag = false; - do_resolve_symbols(ctx); + tbb::parallel_for_each(ctx.dsos, [&](SharedFile *file) { + if (file->is_alive) { + for (Symbol *sym : file->symbols) { + if (sym->file == file && sym->visibility == STV_HIDDEN) { + sym->skip_dso = true; + flag = true; + } + } + } + }); - bool has_lto_obj = false; - for (ObjectFile *file : objs) - if (file->is_alive && (file->is_lto_obj || file->is_gcc_offload_obj)) - has_lto_obj = true; + if (!flag) + return; - if (has_lto_obj) { - // Do link-time optimization. We pass all IR object files to the - // compiler backend to compile them into a few ELF object files. - // - // The compiler backend needs to know how symbols are resolved, - // so compute symbol visibility, import/export bits, etc early. - mark_live_objects(ctx); - apply_version_script(ctx); - parse_symbol_version(ctx); - compute_import_export(ctx); + clear_symbols(ctx); + resolve_symbols(ctx); + } +} - // Do LTO. It compiles IR object files into a few big ELF files. - std::vector *> lto_objs = do_lto(ctx); +// Do link-time optimization. We pass all IR object files to the compiler +// backend to compile them into a few ELF object files. +template +void do_lto(Context &ctx) { + Timer t(ctx, "do_lto"); - // do_resolve_symbols() have removed unreferenced files. Restore the - // original files here because some of them may have to be resurrected - // because they are referenced by the ELF files returned from do_lto(). - ctx.objs = objs; - ctx.dsos = dsos; + // The compiler backend needs to know how symbols are resolved, so + // compute symbol visibility, import/export bits, etc early. + mark_live_objects(ctx); + apply_version_script(ctx); + parse_symbol_version(ctx); + compute_import_export(ctx); - append(ctx.objs, lto_objs); + // Invoke the LTO plugin. This step compiles IR object files into a few + // big ELF files. + std::vector *> lto_objs = run_lto_plugin(ctx); + append(ctx.objs, lto_objs); - // Redo name resolution from scratch. - clear_symbols(ctx); + // Redo name resolution. + clear_symbols(ctx); - // Remove IR object files. - for (ObjectFile *file : ctx.objs) - if (file->is_lto_obj) - file->is_alive = false; + // Remove IR object files. + for (ObjectFile *file : ctx.objs) + if (file->is_lto_obj) + file->is_alive = false; - std::erase_if(ctx.objs, [](ObjectFile *file) { return file->is_lto_obj; }); + std::erase_if(ctx.objs, [](ObjectFile *file) { return file->is_lto_obj; }); - do_resolve_symbols(ctx); - } + resolve_symbols(ctx); } -// .eh_frame sections are parsed and regenerated by the linker for the purpose -// of deduplication and garbage collection. As such, the input sections should -// not be copied over. -// -// However, in very rare cases (e.g. GCC CRT compiled with LTO) we might need -// to resolve cross-object .eh_frame section references (they only point to -// begin or end and don't depend on the actual section contents). -// Therefore, the sections are "killed" after symbol resolution as a separate -// pass. template -void kill_eh_frame_sections(Context &ctx) { - Timer t(ctx, "kill_eh_frame_sections"); +void parse_eh_frame_sections(Context &ctx) { + Timer t(ctx, "parse_eh_frame_sections"); - for (ObjectFile *file : ctx.objs) - for (InputSection *sec : file->eh_frame_sections) - sec->is_alive = false; + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + file->parse_ehframe(ctx); + + for (InputSection *isec : file->eh_frame_sections) + isec->is_alive = false; + }); } template -void split_section_pieces(Context &ctx) { - Timer t(ctx, "split_section_pieces"); +void create_merged_sections(Context &ctx) { + Timer t(ctx, "create_merged_sections"); + // Convert InputSections to MergeableSections. tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { - file->initialize_mergeable_sections(ctx); + file->convert_mergeable_sections(ctx); }); -} - -template -void resolve_section_pieces(Context &ctx) { - Timer t(ctx, "resolve_section_pieces"); - // We aim 2/3 occupation ratio - for (std::unique_ptr> &sec : ctx.merged_sections) - sec->map.resize(sec->estimator.get_cardinality() * 3 / 2); + tbb::parallel_for_each(ctx.merged_sections, + [&](std::unique_ptr> &sec) { + if (sec->shdr.sh_flags & SHF_ALLOC) + sec->resolve(ctx); + }); tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { - file->resolve_section_pieces(ctx); + file->reattach_section_pieces(ctx); }); } @@ -431,61 +403,6 @@ void convert_common_symbols(Context &ctx) { }); } -template -static std::string get_cmdline_args(Context &ctx) { - std::stringstream ss; - ss << ctx.cmdline_args[1]; - for (i64 i = 2; i < ctx.cmdline_args.size(); i++) - ss << " " << ctx.cmdline_args[i]; - return ss.str(); -} - -template -void add_comment_string(Context &ctx, std::string str) { - MergedSection *sec = - MergedSection::get_instance(ctx, ".comment", SHT_PROGBITS, - SHF_MERGE | SHF_STRINGS, 1, 1); - - if (sec->map.nbuckets == 0) - sec->map.resize(4096); - - std::string_view buf = save_string(ctx, str); - std::string_view data(buf.data(), buf.size() + 1); - sec->insert(ctx, data, hash_string(data), 0); -} - -template -void compute_merged_section_sizes(Context &ctx) { - Timer t(ctx, "compute_merged_section_sizes"); - - // Add an identification string to .comment. - if (!ctx.arg.oformat_binary) - add_comment_string(ctx, get_mold_version()); - - // Embed command line arguments for debugging. - if (char *env = getenv("MOLD_DEBUG"); env && env[0]) - add_comment_string(ctx, "mold command line: " + get_cmdline_args(ctx)); - - tbb::parallel_for_each(ctx.merged_sections, - [&](std::unique_ptr> &sec) { - sec->assign_offsets(ctx); - }); -} - -template -static std::vector> split(std::vector &input, i64 unit) { - std::span span(input); - std::vector> vec; - - while (span.size() >= unit) { - vec.push_back(span.subspan(0, unit)); - span = span.subspan(unit); - } - if (!span.empty()) - vec.push_back(span); - return vec; -} - template static bool has_ctors_and_init_array(Context &ctx) { bool x = false; @@ -550,13 +467,6 @@ get_output_name(Context &ctx, std::string_view name, u64 flags) { return ".ARM.extab"; } - if constexpr (is_alpha) { - if (name.starts_with(".sdata.")) - return ".sdata"; - if (name.starts_with(".sbss.")) - return ".sbss"; - } - if (ctx.arg.z_keep_text_section_prefix) { static std::string_view prefixes[] = { ".text.hot.", ".text.unknown.", ".text.unlikely.", ".text.startup.", @@ -574,6 +484,7 @@ get_output_name(Context &ctx, std::string_view name, u64 flags) { ".text.", ".data.rel.ro.", ".data.", ".rodata.", ".bss.rel.ro.", ".bss.", ".init_array.", ".fini_array.", ".tbss.", ".tdata.", ".gcc_except_table.", ".ctors.", ".dtors.", ".gnu.warning.", ".openbsd.randomdata.", + ".sdata.", ".sbss.", ".srodata", }; for (std::string_view prefix : prefixes) { @@ -725,8 +636,7 @@ void create_output_sections(Context &ctx) { // Add output sections and mergeable sections to ctx.chunks for (std::unique_ptr> &osec : ctx.merged_sections) - if (osec->shdr.sh_size) - chunks.push_back(osec.get()); + chunks.push_back(osec.get()); // Sections are added to the section lists in an arbitrary order // because they are created in parallel. Sort them to to make the @@ -1027,7 +937,7 @@ R"(# This is an output of the mold linker's --print-dependencies option. std::unordered_set visited; for (const ElfRel &r : isec->get_rels(ctx)) { - if (r.r_type == R_NONE) + if (r.r_type == R_NONE || file->elf_syms.size() <= r.r_sym) continue; ElfSym &esym = file->elf_syms[r.r_sym]; @@ -1129,6 +1039,50 @@ void check_duplicate_symbols(Context &ctx) { ctx.checkpoint(); } +// If --no-allow-shlib-undefined is specified, we report errors on +// unresolved symbols in shared libraries. This is useful when you are +// creating a final executable and want to make sure that all symbols +// including ones in shared libraries have been resolved. +// +// If you do not pass --no-allow-shlib-undefined, undefined symbols in +// shared libraries will be reported as run-time error by the dynamic +// linker. +template +void check_shlib_undefined(Context &ctx) { + Timer t(ctx, "check_shlib_undefined"); + + auto is_sparc_register = [](const ElfSym &esym) { + // Dynamic symbol table for SPARC contains bogus entries which + // we need to ignore + if constexpr (is_sparc) + return esym.st_type == STT_SPARC_REGISTER; + return false; + }; + + // Obtain a list of known shared library names. + std::unordered_set sonames; + for (SharedFile *file : ctx.dsos) + sonames.insert(file->soname); + + tbb::parallel_for_each(ctx.dsos, [&](SharedFile *file) { + // Skip the file if it depends on a file that we know nothing about. + // This is because missing symbols may be provided by that unknown file. + for (std::string_view needed : file->get_dt_needed(ctx)) + if (sonames.count(needed) == 0) + return; + + // Check if all undefined symbols have been resolved. + for (i64 i = 0; i < file->elf_syms.size(); i++) { + const ElfSym &esym = file->elf_syms[i]; + Symbol &sym = *file->symbols[i]; + if (esym.is_undef() && !esym.is_weak() && !sym.file && + !is_sparc_register(esym)) + Error(ctx) << *file << ": --no-allow-shlib-undefined: undefined symbol: " + << sym; + } + }); +} + template void check_symbol_types(Context &ctx) { Timer t(ctx, "check_symbol_types"); @@ -1138,14 +1092,11 @@ void check_symbol_types(Context &ctx) { append(files, ctx.dsos); auto canonicalize = [](u32 ty) -> u32 { - switch (ty) { - case STT_GNU_IFUNC: + if (ty == STT_GNU_IFUNC) return STT_FUNC; - case STT_COMMON: + if (ty == STT_COMMON) return STT_OBJECT; - default: - return ty; - } + return ty; }; tbb::parallel_for_each(files.begin(), files.end(), [&](InputFile *file) { @@ -1205,6 +1156,11 @@ template void sort_init_fini(Context &ctx) { Timer t(ctx, "sort_init_fini"); + struct Entry { + InputSection *sect; + i64 prio; + }; + for (Chunk *chunk : ctx.chunks) { if (OutputSection *osec = chunk->to_osec()) { if (osec->name == ".init_array" || osec->name == ".preinit_array" || @@ -1212,19 +1168,20 @@ void sort_init_fini(Context &ctx) { if (ctx.arg.shuffle_sections == SHUFFLE_SECTIONS_REVERSE) std::reverse(osec->members.begin(), osec->members.end()); - std::unordered_map *, i64> map; + std::vector vec; for (InputSection *isec : osec->members) { std::string_view name = isec->name(); if (name.starts_with(".ctors") || name.starts_with(".dtors")) - map.insert({isec, 65535 - get_ctor_dtor_priority(isec)}); + vec.push_back({isec, 65535 - get_ctor_dtor_priority(isec)}); else - map.insert({isec, get_init_fini_priority(isec)}); + vec.push_back({isec, get_init_fini_priority(isec)}); } - sort(osec->members, [&](InputSection *a, InputSection *b) { - return map[a] < map[b]; - }); + sort(vec, [&](const Entry &a, const Entry &b) { return a.prio < b.prio; }); + + for (i64 i = 0; i < vec.size(); i++) + osec->members[i] = vec[i].sect; } } } @@ -1234,19 +1191,25 @@ template void sort_ctor_dtor(Context &ctx) { Timer t(ctx, "sort_ctor_dtor"); + struct Entry { + InputSection *sect; + i64 prio; + }; + for (Chunk *chunk : ctx.chunks) { if (OutputSection *osec = chunk->to_osec()) { if (osec->name == ".ctors" || osec->name == ".dtors") { if (ctx.arg.shuffle_sections != SHUFFLE_SECTIONS_REVERSE) std::reverse(osec->members.begin(), osec->members.end()); - std::unordered_map *, i64> map; + std::vector vec; for (InputSection *isec : osec->members) - map.insert({isec, get_ctor_dtor_priority(isec)}); + vec.push_back({isec, get_ctor_dtor_priority(isec)}); - sort(osec->members, [&](InputSection *a, InputSection *b) { - return map[a] < map[b]; - }); + sort(vec, [&](const Entry &a, const Entry &b) { return a.prio < b.prio; }); + + for (i64 i = 0; i < vec.size(); i++) + osec->members[i] = vec[i].sect; } } } @@ -1286,10 +1249,13 @@ void fixup_ctors_in_init_array(Context &ctx) { } }; - if (OutputSection *osec = find_section(ctx, ".init_array")) - fixup(*osec); - if (OutputSection *osec = find_section(ctx, ".fini_array")) - fixup(*osec); + if (Chunk *chunk = find_chunk(ctx, ".init_array")) + if (OutputSection *osec = chunk->to_osec()) + fixup(*osec); + + if (Chunk *chunk = find_chunk(ctx, ".fini_array")) + if (OutputSection *osec = chunk->to_osec()) + fixup(*osec); } template @@ -1358,76 +1324,22 @@ template void compute_section_sizes(Context &ctx) { Timer t(ctx, "compute_section_sizes"); - struct Group { - i64 size = 0; - i64 p2align = 0; - i64 offset = 0; - std::span *> members; - }; - - tbb::parallel_for_each(ctx.chunks, [&](Chunk *chunk) { - OutputSection *osec = chunk->to_osec(); - if (!osec) - return; - - // This pattern will be processed in the next loop. - if constexpr (needs_thunk) - if ((osec->shdr.sh_flags & SHF_EXECINSTR) && !ctx.arg.relocatable) - return; - - // Since one output section may contain millions of input sections, - // we first split input sections into groups and assign offsets to - // groups. - std::vector groups; - constexpr i64 group_size = 10000; - - for (std::span *> span : split(osec->members, group_size)) - groups.push_back(Group{.members = span}); + if constexpr (needs_thunk) { + // We cannot use parallel-for for compute_section_size() which may + // call create_range_extension_thunks() because that function is + // not thread-safe. + for (Chunk *chunk : ctx.chunks) + if (chunk->shdr.sh_flags & SHF_EXECINSTR) + chunk->compute_section_size(ctx); - tbb::parallel_for_each(groups, [](Group &group) { - for (InputSection *isec : group.members) { - group.size = align_to(group.size, 1 << isec->p2align) + isec->sh_size; - group.p2align = std::max(group.p2align, isec->p2align); - } + tbb::parallel_for_each(ctx.chunks, [&](Chunk *chunk) { + if (!(chunk->shdr.sh_flags & SHF_EXECINSTR)) + chunk->compute_section_size(ctx); }); - - ElfShdr &shdr = osec->shdr; - shdr.sh_size = 0; - - for (i64 i = 0; i < groups.size(); i++) { - shdr.sh_size = align_to(shdr.sh_size, 1 << groups[i].p2align); - groups[i].offset = shdr.sh_size; - shdr.sh_size += groups[i].size; - shdr.sh_addralign = std::max(shdr.sh_addralign, 1 << groups[i].p2align); - } - - // Assign offsets to input sections. - tbb::parallel_for_each(groups, [](Group &group) { - i64 offset = group.offset; - for (InputSection *isec : group.members) { - offset = align_to(offset, 1 << isec->p2align); - isec->offset = offset; - offset += isec->sh_size; - } + } else { + tbb::parallel_for_each(ctx.chunks, [&](Chunk *chunk) { + chunk->compute_section_size(ctx); }); - }); - - // On ARM32 or ARM64, we may need to create so-called "range extension - // thunks" to extend branch instructions reach, as they can jump only - // to ±16 MiB or ±128 MiB, respecitvely. - // - // In the following loop, We compute the sizes of sections while - // inserting thunks. This pass cannot be parallelized. That is, - // create_range_extension_thunks is parallelized internally, but the - // function itself is not thread-safe. - if constexpr (needs_thunk) { - Timer t2(ctx, "create_range_extension_thunks"); - - if (!ctx.arg.relocatable) - for (Chunk *chunk : ctx.chunks) - if (OutputSection *osec = chunk->to_osec()) - if (osec->shdr.sh_flags & SHF_EXECINSTR) - osec->create_range_extension_thunks(ctx); } } @@ -1535,6 +1447,14 @@ void scan_relocations(Context &ctx) { file->scan_relocations(ctx); }); + // Word-size absolute relocations (e.g. R_X86_64_64) are handled + // separately because they can be promoted to dynamic relocations. + tbb::parallel_for_each(ctx.chunks, [&](Chunk *chunk) { + if (OutputSection *osec = chunk->to_osec()) + if (osec->shdr.sh_flags & SHF_ALLOC) + osec->scan_abs_relocations(ctx); + }); + // Exit if there was a relocation that refers an undefined symbol. ctx.checkpoint(); @@ -1608,9 +1528,6 @@ void scan_relocations(Context &ctx) { sym->flags = 0; } - if constexpr (is_alpha) - ctx.extra.got->finalize(); - if (ctx.has_textrel && ctx.arg.warn_textrel) Warn(ctx) << "creating a DT_TEXTREL in an output file"; } @@ -1702,13 +1619,21 @@ void copy_chunks(Context &ctx) { // For --relocatable and --emit-relocs, we want to copy non-relocation // sections first. This is because REL-type relocation sections (as // opposed to RELA-type) stores relocation addends to target sections. + // + // We also does that for SH4 because despite being RELA, we always need + // to write addends to relocated places for SH4. + auto is_rel = [](Chunk &chunk) { + return chunk.shdr.sh_type == SHT_REL || + (is_sh4 && chunk.shdr.sh_type == SHT_RELA); + }; + tbb::parallel_for_each(ctx.chunks, [&](Chunk *chunk) { - if (chunk->shdr.sh_type != SHT_REL) + if (!is_rel(*chunk)) copy(*chunk); }); tbb::parallel_for_each(ctx.chunks, [&](Chunk *chunk) { - if (chunk->shdr.sh_type == SHT_REL) + if (is_rel(*chunk)) copy(*chunk); }); @@ -1718,62 +1643,21 @@ void copy_chunks(Context &ctx) { // undefined errors. report_undef_errors(ctx); - if constexpr (is_arm32) - fixup_arm_exidx_section(ctx); -} + // Zero-clear paddings between chunks + auto zero = [&](Chunk *chunk, i64 next_start) { + i64 pos = chunk->shdr.sh_offset + chunk->shdr.sh_size; + memset(ctx.buf + pos, 0, next_start - pos); + }; -// Rewrite the leading endbr64 instruction with a nop if a function -// symbol's address was not taken. -template -void rewrite_endbr(Context &ctx) { - Timer t(ctx, "rewrite_endbr"); - assert(is_x86_64); + std::vector *> chunks = ctx.chunks; - // Compute address-taken bit for each symbol - tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { - for (std::unique_ptr> &isec : file->sections) { - if (isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC)) { - for (const ElfRel &rel : isec->get_rels(ctx)) { - Symbol &sym = *file->symbols[rel.r_sym]; - if (!is_func_call_rel(rel) && sym.esym().st_type == STT_FUNC) { - std::scoped_lock lock(sym.mu); - sym.address_taken = true; - } - } - } - } + std::erase_if(chunks, [](Chunk *chunk) { + return chunk->shdr.sh_type == SHT_NOBITS; }); - // Exported symbols are conservatively assumed to be address-taken. - if (ctx.dynsym) - for (Symbol *sym : ctx.dynsym->symbols) - if (sym && sym->is_exported) - sym->address_taken = true; - - // Some symbols are implicitly address-taken - ctx.arg.entry->address_taken = true; - ctx.arg.init->address_taken = true; - ctx.arg.fini->address_taken = true; - - // Rewrite endbr64 with nop - u8 endbr64[] = {0xf3, 0x0f, 0x1e, 0xfa}; - u8 nop[] = {0x0f, 0x1f, 0x40, 0x00}; - - tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { - for (Symbol *sym : file->symbols) { - if (sym->file == file && sym->esym().st_type == STT_FUNC && - !sym->address_taken) { - if (InputSection *isec = sym->get_input_section()) { - if (OutputSection *osec = isec->output_section) { - u8 *buf = ctx.buf + osec->shdr.sh_offset + isec->offset + - sym->value; - if (memcmp(buf, endbr64, 4) == 0) - memcpy(buf, nop, 4); - } - } - } - } - }); + for (i64 i = 1; i < chunks.size(); i++) + zero(chunks[i - 1], chunks[i]->shdr.sh_offset); + zero(chunks.back(), ctx.output_file->filesize); } template @@ -1785,16 +1669,77 @@ void construct_relr(Context &ctx) { }); } +// The hash function for .gnu.hash. +static u32 djb_hash(std::string_view name) { + u32 h = 5381; + for (u8 c : name) + h = (h << 5) + h + c; + return h; +} + template -void create_output_symtab(Context &ctx) { - Timer t(ctx, "compute_symtab_size"); +void sort_dynsyms(Context &ctx) { + Timer t(ctx, "sort_dynsyms"); - if (!ctx.arg.strip_all && !ctx.arg.retain_symbols_file) { - tbb::parallel_for_each(ctx.chunks, [&](Chunk *chunk) { - chunk->compute_symtab_size(ctx); + std::span *> syms = ctx.dynsym->symbols; + if (syms.empty()) + return; + + // In any symtab, local symbols must precede global symbols. + auto first_global = std::stable_partition(syms.begin() + 1, syms.end(), + [&](Symbol *sym) { + return sym->is_local(ctx); + }); + + // .gnu.hash imposes more restrictions on the order of the symbols in + // .dynsym. + if (ctx.gnu_hash) { + auto first_exported = std::stable_partition(first_global, syms.end(), + [&](Symbol *sym) { + return !sym->is_exported; + }); + + // Count the number of exported symbols to compute the size of .gnu.hash. + i64 num_exported = syms.end() - first_exported; + u32 num_buckets = num_exported / ctx.gnu_hash->LOAD_FACTOR + 1; + + tbb::parallel_for_each(first_exported, syms.end(), [&](Symbol *sym) { + sym->set_djb_hash(ctx, djb_hash(sym->name())); + }); + + tbb::parallel_sort(first_exported, syms.end(), + [&](Symbol *a, Symbol *b) { + return std::tuple(a->get_djb_hash(ctx) % num_buckets, a->name()) < + std::tuple(b->get_djb_hash(ctx) % num_buckets, b->name()); }); + + ctx.gnu_hash->num_buckets = num_buckets; + ctx.gnu_hash->num_exported = num_exported; } + // Compute .dynstr size + ctx.dynstr->dynsym_offset = ctx.dynstr->shdr.sh_size; + + tbb::enumerable_thread_specific size; + tbb::parallel_for((i64)1, (i64)syms.size(), [&](i64 i) { + syms[i]->set_dynsym_idx(ctx, i); + size.local() += syms[i]->name().size() + 1; + }); + + ctx.dynstr->shdr.sh_size += size.combine(std::plus()); + + // ELF's symbol table sh_info holds the offset of the first global symbol. + ctx.dynsym->shdr.sh_info = first_global - syms.begin(); +} + +template +void create_output_symtab(Context &ctx) { + Timer t(ctx, "compute_symtab_size"); + + tbb::parallel_for_each(ctx.chunks, [&](Chunk *chunk) { + chunk->compute_symtab_size(ctx); + }); + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { file->compute_symtab_size(ctx); }); @@ -1951,6 +1896,9 @@ static bool should_export(Context &ctx, Symbol &sym) { switch (sym.ver_idx) { case VER_NDX_UNSPECIFIED: + if (ctx.arg.dynamic_list_data) + if (u32 ty = sym.get_type(); ty != STT_FUNC && ty != STT_GNU_IFUNC) + return true; if (ctx.arg.shared) return !((ObjectFile *)sym.file)->exclude_libs; return ctx.arg.export_dynamic; @@ -2168,26 +2116,6 @@ void compute_address_significance(Context &ctx) { }); } -template -void clear_padding(Context &ctx) { - Timer t(ctx, "clear_padding"); - - auto zero = [&](Chunk *chunk, i64 next_start) { - i64 pos = chunk->shdr.sh_offset + chunk->shdr.sh_size; - memset(ctx.buf + pos, 0, next_start - pos); - }; - - std::vector *> chunks = ctx.chunks; - - std::erase_if(chunks, [](Chunk *chunk) { - return chunk->shdr.sh_type == SHT_NOBITS; - }); - - for (i64 i = 1; i < chunks.size(); i++) - zero(chunks[i - 1], chunks[i]->shdr.sh_offset); - zero(chunks.back(), ctx.output_file->filesize); -} - // We want to sort output chunks in the following order. // // @@ -2209,7 +2137,6 @@ void clear_padding(Context &ctx) { // // .got // .toc -// .alpha_got // // .relro_padding // @@ -2302,8 +2229,6 @@ void sort_output_sections_regular(Context &ctx) { return 2; if (chunk->name == ".toc") return 3; - if (chunk->name == ".alpha_got") - return 4; if (chunk == ctx.relro_padding) return INT64_MAX; return 0; @@ -2380,11 +2305,6 @@ void sort_output_sections(Context &ctx) { sort_output_sections_by_order(ctx); } -template -static bool is_tbss(Chunk *chunk) { - return (chunk->shdr.sh_type == SHT_NOBITS) && (chunk->shdr.sh_flags & SHF_TLS); -} - // This function assigns virtual addresses to output sections. Assigning // addresses is a bit tricky because we want to pack sections as tightly // as possible while not violating the constraints imposed by the hardware @@ -2450,6 +2370,10 @@ static void set_virtual_addresses_regular(Context &ctx) { return chunk == first_tls_chunk ? tls_alignment : (u64)chunk->shdr.sh_addralign; }; + auto is_tbss = [](Chunk *chunk) { + return (chunk->shdr.sh_type == SHT_NOBITS) && (chunk->shdr.sh_flags & SHF_TLS); + }; + for (i64 i = 0; i < chunks.size(); i++) { if (!(chunks[i]->shdr.sh_flags & SHF_ALLOC)) continue; @@ -2679,6 +2603,24 @@ static i64 set_file_offsets(Context &ctx) { return fileoff; } +// Remove debug sections from ctx.chunks and save them to ctx.debug_chunks. +// This is for --separate-debug-file. +template +void separate_debug_sections(Context &ctx) { + auto is_debug_section = [&](Chunk *chunk) { + if (chunk->shdr.sh_flags & SHF_ALLOC) + return false; + return chunk == ctx.gdb_index || chunk == ctx.symtab || chunk == ctx.strtab || + chunk->name.starts_with(".debug_"); + }; + + auto mid = std::stable_partition(ctx.chunks.begin(), ctx.chunks.end(), + is_debug_section); + + ctx.debug_chunks = {ctx.chunks.begin(), mid}; + ctx.chunks.erase(ctx.chunks.begin(), mid); +} + template void compute_section_headers(Context &ctx) { // Update sh_size for each chunk. @@ -2816,7 +2758,7 @@ void fix_synthetic_symbols(Context &ctx) { // If we set values to these symbols in a static PIE, glibc attempts // to run ifunc initializers twice, with the second attempt with wrong // function addresses, causing a segmentation fault. - if (ctx.reldyn && ctx.arg.is_static && !ctx.arg.pie) { + if (ctx.reldyn && ctx.arg.static_ && !ctx.arg.pie) { stop(ctx.__rel_iplt_start, ctx.reldyn); stop(ctx.__rel_iplt_end, ctx.reldyn); @@ -2973,7 +2915,7 @@ void fix_synthetic_symbols(Context &ctx) { } template -i64 compress_debug_sections(Context &ctx) { +void compress_debug_sections(Context &ctx) { Timer t(ctx, "compress_debug_sections"); tbb::parallel_for((i64)0, (i64)ctx.chunks.size(), [&](i64 i) { @@ -2995,8 +2937,179 @@ i64 compress_debug_sections(Context &ctx) { ctx.ehdr->update_shdr(ctx); if (ctx.shdr) ctx.shdr->update_shdr(ctx); +} + +// BLAKE3 is a cryptographic hash function just like SHA256. +// We use it instead of SHA256 because it's faster. +static void blake3_hash(u8 *buf, i64 size, u8 *out) { + blake3_hasher hasher; + blake3_hasher_init(&hasher); + blake3_hasher_update(&hasher, buf, size); + blake3_hasher_finalize(&hasher, out, BLAKE3_OUT_LEN); +} + +template +std::vector> get_shards(Context &ctx) { + constexpr i64 shard_size = 4 * 1024 * 1024; // 4 MiB + std::span buf = {ctx.buf, (size_t)ctx.output_file->filesize}; + std::vector> vec; + + while (!buf.empty()) { + i64 sz = std::min(shard_size, buf.size()); + vec.push_back(buf.subspan(0, sz)); + buf = buf.subspan(sz); + } + return vec; +} + +template +void write_build_id(Context &ctx) { + Timer t(ctx, "write_build_id"); + + switch (ctx.arg.build_id.kind) { + case BuildId::HEX: + ctx.buildid->contents = ctx.arg.build_id.value; + break; + case BuildId::HASH: { + std::vector> shards = get_shards(ctx); + std::vector hashes(shards.size() * BLAKE3_OUT_LEN); + + tbb::parallel_for((i64)0, (i64)shards.size(), [&](i64 i) { + blake3_hash(shards[i].data(), shards[i].size(), + hashes.data() + i * BLAKE3_OUT_LEN); + +#ifdef HAVE_MADVISE + // Make the kernel page out the file contents we've just written + // so that subsequent close(2) call will become quicker. + if (i > 0 && ctx.output_file->is_mmapped) + madvise(begin, end - begin, MADV_DONTNEED); +#endif + }); + + u8 buf[BLAKE3_OUT_LEN]; + blake3_hash(hashes.data(), hashes.size(), buf); + + assert(ctx.arg.build_id.size() <= BLAKE3_OUT_LEN); + ctx.buildid->contents = {buf, buf + ctx.arg.build_id.size()}; + break; + } + case BuildId::UUID: { + u8 buf[16]; + get_random_bytes(buf, 16); + + // Indicate that this is UUIDv4 as defined by RFC4122 + buf[6] = (buf[6] & 0b0000'1111) | 0b0100'0000; + buf[8] = (buf[8] & 0b0011'1111) | 0b1000'0000; + ctx.buildid->contents = {buf, buf + 16}; + break; + } + default: + unreachable(); + } + + ctx.buildid->copy_buf(ctx); +} + +// A .gnu_debuglink section contains a filename and a CRC32 checksum of a +// debug info file. When we are writing a .gnu_debuglink, we don't know +// its CRC32 checksum because we haven't created a debug info file. So we +// write a dummy value instead. +// +// We can't choose a random value as a dummy value for build +// reproducibility. We also don't want to write a fixed value for all +// files because the CRC checksum is in this section to prevent using +// wrong file on debugging. gdb rejects a debug info file if its CRC +// doesn't match with the one in .gdb_debuglink. +// +// Therefore, we'll try to make our CRC checksum as unique as possible. +// We'll remember that checksum, and after creating a debug info file, add +// a few bytes of garbage at the end of it so that the debug info file's +// CRC checksum becomes the one that we have precomputed. +template +void write_gnu_debuglink(Context &ctx) { + Timer t(ctx, "write_gnu_debuglink"); + u32 crc32; + + if (ctx.buildid) { + crc32 = compute_crc32(0, ctx.buildid->contents.data(), + ctx.buildid->contents.size()); + } else { + std::vector> shards = get_shards(ctx); + std::vector> hashes(shards.size()); + + tbb::parallel_for((i64)0, (i64)shards.size(), [&](i64 i) { + hashes[i] = hash_string({(char *)shards[i].data(), shards[i].size()}); + }); + crc32 = compute_crc32(0, (u8 *)hashes.data(), hashes.size() * 8); + } + + ctx.gnu_debuglink->crc32 = crc32; + ctx.gnu_debuglink->copy_buf(ctx); +} + +// Write a separate debug file. This function is called after we finish +// writing to the usual output file. +template +void write_separate_debug_file(Context &ctx) { + Timer t(ctx, "write_separate_debug_file"); + + // Open an output file early + LockingOutputFile *file = + new LockingOutputFile(ctx, ctx.arg.separate_debug_file, 0666); + + // We want to write to the debug info file in background so that the + // user doesn't have to wait for it to complete. + if (ctx.arg.detach) + notify_parent(); + + // A debug info file contains all sections as the original file, though + // most of them can be empty as if they were bss sections. We convert + // real sections into dummy sections here. + for (i64 i = 0; i < ctx.chunks.size(); i++) { + Chunk *chunk = ctx.chunks[i]; + if (chunk != ctx.ehdr && chunk != ctx.shdr && chunk != ctx.shstrtab && + chunk->shdr.sh_type != SHT_NOTE) { + Chunk *sec = new OutputSection(chunk->name, SHT_NULL); + sec->shdr = chunk->shdr; + sec->shdr.sh_type = SHT_NOBITS; + + ctx.chunks[i] = sec; + ctx.chunk_pool.emplace_back(sec); + } + } + + // Restore debug info sections that had been set aside while we were + // creating the main file. + tbb::parallel_for_each(ctx.debug_chunks, [&](Chunk *chunk) { + chunk->compute_section_size(ctx); + }); + + append(ctx.chunks, ctx.debug_chunks); + + // Write to the debug info file as if it were a regular output file. + compute_section_headers(ctx); + file->resize(ctx, set_osec_offsets(ctx)); + + ctx.output_file.reset(file); + ctx.buf = ctx.output_file->buf; + + copy_chunks(ctx); + + if (ctx.gdb_index) + write_gdb_index(ctx); + + // Reverse-compute a CRC32 value so that the CRC32 checksum embedded to + // the .gnu_debuglink section in the main executable matches with the + // debug info file's CRC32 checksum. + u32 crc = compute_crc32(0, ctx.buf, ctx.output_file->filesize); + + std::vector &buf2 = ctx.output_file->buf2; + if (!buf2.empty()) + crc = compute_crc32(crc, buf2.data(), buf2.size()); - return set_osec_offsets(ctx); + std::vector trailer = crc32_solve(crc, ctx.gnu_debuglink->crc32); + append(ctx.output_file->buf2, trailer); + ctx.output_file->close(ctx); } // Write Makefile-style dependency rules to a file specified by @@ -3102,11 +3215,10 @@ template void create_internal_file(Context &); template void apply_exclude_libs(Context &); template void create_synthetic_sections(Context &); template void resolve_symbols(Context &); -template void kill_eh_frame_sections(Context &); -template void split_section_pieces(Context &); -template void resolve_section_pieces(Context &); +template void do_lto(Context &); +template void parse_eh_frame_sections(Context &); +template void create_merged_sections(Context &); template void convert_common_symbols(Context &); -template void compute_merged_section_sizes(Context &); template void create_output_sections(Context &); template void add_synthetic_symbols(Context &); template void check_cet_errors(Context &); @@ -3114,6 +3226,7 @@ template void apply_section_align(Context &); template void print_dependencies(Context &); template void write_repro_file(Context &); template void check_duplicate_symbols(Context &); +template void check_shlib_undefined(Context &); template void check_symbol_types(Context &); template void sort_init_fini(Context &); template void sort_ctor_dtor(Context &); @@ -3127,19 +3240,22 @@ template void scan_relocations(Context &); template void report_undef_errors(Context &); template void create_reloc_sections(Context &); template void copy_chunks(Context &); -template void rewrite_endbr(Context &); template void construct_relr(Context &); +template void sort_dynsyms(Context &); template void create_output_symtab(Context &); template void apply_version_script(Context &); template void parse_symbol_version(Context &); template void compute_import_export(Context &); template void compute_address_significance(Context &); -template void clear_padding(Context &); +template void separate_debug_sections(Context &); template void compute_section_headers(Context &); template i64 set_osec_offsets(Context &); template void fix_synthetic_symbols(Context &); -template i64 compress_debug_sections(Context &); +template void compress_debug_sections(Context &); +template void write_build_id(Context &); +template void write_gnu_debuglink(Context &); +template void write_separate_debug_file(Context &); template void write_dependency_file(Context &); template void show_stats(Context &); -} // namespace mold::elf +} // namespace mold diff --git a/elf/relocatable.cc b/src/relocatable.cc similarity index 96% rename from elf/relocatable.cc rename to src/relocatable.cc index 01bf6d39..639dc6ae 100644 --- a/elf/relocatable.cc +++ b/src/relocatable.cc @@ -35,7 +35,7 @@ #include #include -namespace mold::elf { +namespace mold { // Create linker-synthesized sections template @@ -148,8 +148,6 @@ static u64 r_set_osec_offsets(Context &ctx) { template void combine_objects(Context &ctx) { - compute_merged_section_sizes(ctx); - create_output_sections(ctx); r_create_synthetic_sections(ctx); @@ -171,12 +169,10 @@ void combine_objects(Context &ctx) { compute_section_headers(ctx); i64 filesize = r_set_osec_offsets(ctx); - ctx.output_file = - OutputFile>::open(ctx, ctx.arg.output, filesize, 0666); + ctx.output_file = OutputFile::open(ctx, ctx.arg.output, filesize, 0666); ctx.buf = ctx.output_file->buf; copy_chunks(ctx); - clear_padding(ctx); ctx.output_file->close(ctx); ctx.checkpoint(); @@ -197,4 +193,4 @@ using E = MOLD_TARGET; template void combine_objects(Context &); -} // namespace mold::elf +} // namespace mold diff --git a/src/shrink-sections.cc b/src/shrink-sections.cc new file mode 100644 index 00000000..cfd3f4b3 --- /dev/null +++ b/src/shrink-sections.cc @@ -0,0 +1,151 @@ +// Since RISC instructions are generally up to 32 bits long, there's no +// way to embed very large immediates into their branch instructions. For +// example, RISC-V's JAL (jump and link) instruction can jump to only +// within PC ± 1 MiB because its immediate is 21 bits long. If the +// destination is further than that, we need to use two instructions +// instead; the first instruction being AUIPC, which sets the upper 20 +// bits of a displacement to a register, and the second being JALR, which +// specifies the lower 12 bits and the register. Combined, they specify a +// 32-bit displacement, which is sufficient to support the medium code +// model. +// +// However, always using two or more instructions for function calls is a +// waste of time and space if the branch target is within a single +// instruction's reach. There are two approaches to address this problem +// as follows: +// +// 1. The compiler optimistically emits a single branch instruction for +// all function calls. The linker then checks if the branch target is +// reachable, and if not, redirects the branch to a linker-synthesized +// code sequence that uses two or more instructions to branch further. +// That linker-synthesized code is called a "thunk". All RISC psABIs +// except RISC-V and LoongArch take this approach. +// +// 2. The compiler pessimistically emits two instructions to branch +// anywhere in PC ± 2 GiB, and the linker rewrites them with a single +// instruction if the branch target is close enough. RISC-V and +// LoongArch take this approach. +// +// This file contains functions to support (2). For (1), see thunks.cc. +// +// With the presence of this code-shrinking relaxation, sections can no +// longer be considered as atomic units. If we delete an instruction from +// the middle of a section, the section contents after that point need to +// be shifted by the size of the instruction. Symbol values and relocation +// offsets have to be shifted too if they refer to bytes past the deleted +// ones. +// +// In mold, we use `r_deltas` to memorize how many bytes have been shifted +// for relocations. For symbols, we directly mutate their `value` member. +// +// RISC-V and LoongArch object files tend to have way more relocations +// than those for other targets. This is because all branches, including +// those that jump within the same section, are explicitly expressed with +// relocations. Here is why we need them: all control-flow statements, +// such as `if` or `for`, are implemented using branch instructions. For +// other targets, the compiler doesn't emit relocations for such branches +// because it knows at compile-time exactly how many bytes have to be +// skipped. That's not true in RISC-V and LoongArch because the linker may +// delete bytes between a branch and its target. Therefore, all branches, +// including in-section ones, have to be explicitly expressed with +// relocations. +// +// Note that this mechanism only shrinks sections and never enlarges them, +// as the compiler always emits the longest instruction sequence. This +// makes the linker implementation a bit simpler because we don't need to +// worry about oscillation. + +#if MOLD_RV64LE || MOLD_RV64BE || MOLD_RV32LE || MOLD_RV32BE || \ + MOLD_LOONGARCH64 || MOLD_LOONGARCH32 + +#include "mold.h" + +#include + +namespace mold { + +using E = MOLD_TARGET; + +static bool is_resizable(InputSection *isec) { + return isec && isec->is_alive && (isec->shdr().sh_flags & SHF_ALLOC) && + (isec->shdr().sh_flags & SHF_EXECINSTR); +} + +template <> +void shrink_sections(Context &ctx) { + Timer t(ctx, "shrink_sections"); + + // True if we can use the 2-byte instructions. This is usually true on + // Unix because RV64GC is generally considered the baseline hardware. + bool use_rvc = false; + if constexpr (is_riscv) + use_rvc = get_eflags(ctx) & EF_RISCV_RVC; + + // Find all relaxable relocations and record how many bytes we can save + // into r_deltas. + // + // Technically speaking, relaxing relocations may allow more relocations + // to be relaxed because the distance between a branch instruction and + // its target may decrease as a result of relaxation. That said, the + // number of such relocations is negligible (I tried to self-host mold + // on RISC-V as an experiment and found that the mold-built .text is + // only ~0.04% larger than that of GNU ld), so we don't bother to handle + // them. We scan relocations only once here. + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (std::unique_ptr> &isec : file->sections) + if (is_resizable(isec.get())) + shrink_section(ctx, *isec, use_rvc); + }); + + // Fix symbol values. + tbb::parallel_for_each(ctx.objs, [&](ObjectFile *file) { + for (Symbol *sym : file->symbols) { + if (sym->file != file) + continue; + + InputSection *isec = sym->get_input_section(); + if (!isec || isec->extra.r_deltas.empty()) + continue; + + std::span> rels = isec->get_rels(ctx); + auto it = std::lower_bound(rels.begin(), rels.end(), sym->value, + [&](const ElfRel &r, u64 val) { + return r.r_offset < val; + }); + + sym->value -= isec->extra.r_deltas[it - rels.begin()]; + } + }); + + // Recompute sizes of executable sections + tbb::parallel_for_each(ctx.chunks, [&](Chunk *chunk) { + if (chunk->to_osec() && (chunk->shdr.sh_flags & SHF_EXECINSTR)) + chunk->compute_section_size(ctx); + }); +} + +// Returns the distance between a relocated place and a symbol. +template <> +i64 compute_distance(Context &ctx, Symbol &sym, + InputSection &isec, const ElfRel &rel) { + // We handle absolute symbols as if they were infinitely far away + // because `shrink_section` may increase a distance between a branch + // instruction and an absolute symbol. Branching to an absolute + // location is extremely rare in real code, though. + if (sym.is_absolute()) + return INT64_MAX; + + // Likewise, relocations against weak undefined symbols won't be relaxed. + if (sym.esym().is_undef_weak()) + return INT64_MAX; + + // Compute a distance between the relocated place and the symbol. + i64 S = sym.get_addr(ctx); + i64 A = rel.r_addend; + i64 P = isec.get_addr() + rel.r_offset; + return S + A - P; +} + +} // namespace mold + +#endif diff --git a/elf/subprocess.cc b/src/subprocess-unix.cc similarity index 88% rename from elf/subprocess.cc rename to src/subprocess-unix.cc index 51be8972..44e5e65a 100644 --- a/elf/subprocess.cc +++ b/src/subprocess-unix.cc @@ -1,5 +1,3 @@ -#if !defined(_WIN32) && !defined(__APPLE__) - #include "mold.h" #include "config.h" @@ -11,13 +9,15 @@ #include #include -namespace mold::elf { +namespace mold { #ifdef MOLD_X86_64 +static int pipe_write_fd = -1; + // Exiting from a program with large memory usage is slow -- // it may take a few hundred milliseconds. To hide the latency, // we fork a child and let it do the actual linking work. -std::function fork_child() { +void fork_child() { int pipefd[2]; if (pipe(pipefd) == -1) { perror("pipe"); @@ -50,12 +50,17 @@ std::function fork_child() { // Child close(pipefd[0]); + pipe_write_fd = pipefd[1]; +} + +void notify_parent() { + if (pipe_write_fd == -1) + return; - return [=] { - char buf[] = {1}; - [[maybe_unused]] int n = write(pipefd[1], buf, 1); - assert(n == 1); - }; + char buf[] = {1}; + [[maybe_unused]] int n = write(pipe_write_fd, buf, 1); + assert(n == 1); + pipe_write_fd = -1; } #endif @@ -84,6 +89,9 @@ static std::string find_dso(Context &ctx, std::filesystem::path self) { template [[noreturn]] void process_run_subcommand(Context &ctx, int argc, char **argv) { +#ifdef __APPLE__ + Fatal(ctx) << "-run is not supported on macOS"; +#else assert(argv[1] == "-run"s || argv[1] == "--run"s); if (!argv[2]) @@ -111,12 +119,11 @@ void process_run_subcommand(Context &ctx, int argc, char **argv) { // Execute a given command execvp(argv[2], argv + 2); Fatal(ctx) << "mold -run failed: " << argv[2] << ": " << errno_string(); +#endif } using E = MOLD_TARGET; template void process_run_subcommand(Context &, int, char **); -} // namespace mold::elf - -#endif +} // namespace mold diff --git a/src/subprocess-win32.cc b/src/subprocess-win32.cc new file mode 100644 index 00000000..fb336827 --- /dev/null +++ b/src/subprocess-win32.cc @@ -0,0 +1,20 @@ +#include "mold.h" + +namespace mold { + +#ifdef MOLD_X86_64 +void fork_child() {} +void notify_parent() {} +#endif + +template +[[noreturn]] +void process_run_subcommand(Context &ctx, int argc, char **argv) { + Fatal(ctx) << "-run is supported only on Unix"; +} + +using E = MOLD_TARGET; + +template void process_run_subcommand(Context &, int, char **); + +} // namespace mold diff --git a/elf/thunks.cc b/src/thunks.cc similarity index 92% rename from elf/thunks.cc rename to src/thunks.cc index 26b0d15c..c5a99fbc 100644 --- a/elf/thunks.cc +++ b/src/thunks.cc @@ -20,15 +20,14 @@ // we don't need to try too hard to reduce thunk size to the absolute // minimum. -#if MOLD_ARM32 || MOLD_ARM64 || MOLD_PPC32 || MOLD_PPC64V1 || MOLD_PPC64V2 || \ - MOLD_LOONGARCH64 || MOLD_LOONGARCH32 +#if MOLD_ARM32 || MOLD_ARM64 || MOLD_PPC32 || MOLD_PPC64V1 || MOLD_PPC64V2 #include "mold.h" #include #include -namespace mold::elf { +namespace mold { using E = MOLD_TARGET; @@ -39,9 +38,7 @@ static consteval i64 max_distance() { // and therefore the least two bits are always zero. So the branch // operand is effectively 28 bits long. That means the branch range is // [-2^27, 2^27) or PC ± 128 MiB. - // - // LoongArch's BR instruction also takes a 26 bit immediate. - if (is_arm64 || is_loongarch) + if (is_arm64) return 1 << 27; // ARM32's Thumb branch has 24 bits immediate, and the instructions are @@ -179,6 +176,7 @@ void OutputSection::create_range_extension_thunks(Context &ctx) { // haven't. for (InputSection *isec : m) isec->offset = -1; + thunks.clear(); // We create thunks from the beginning of the section to the end. // We manage progress using four offsets which increase monotonically. @@ -247,10 +245,8 @@ void OutputSection::create_range_extension_thunks(Context &ctx) { // Scan relocations between B and C to collect symbols that need // entries in the new thunk. - tbb::parallel_for_each(m.begin() + b, m.begin() + c, - [&](InputSection *isec) { - scan_rels(ctx, *isec, *thunk, thunk_idx); - }); + for (i64 i = b; i < c; i++) + scan_rels(ctx, *m[i], *thunk, thunk_idx); // Now that we know the number of symbols in the thunk, we can compute // the thunk's size. @@ -270,16 +266,15 @@ void OutputSection::create_range_extension_thunks(Context &ctx) { } // Scan relocations again to fix symbol offsets in the last thunk. - tbb::parallel_for_each(m.begin() + b, m.begin() + c, - [&](InputSection *isec) { - std::span *> syms = isec->file.symbols; - std::span> rels = isec->get_rels(ctx); - std::span thunk_refs = isec->extra.thunk_refs; - - for (i64 i = 0; i < rels.size(); i++) - if (thunk_refs[i].thunk_idx == thunk_idx) - thunk_refs[i].sym_idx = syms[rels[i].r_sym]->extra.thunk_sym_idx; - }); + for (i64 i = b; i < c; i++) { + std::span *> syms = m[i]->file.symbols; + std::span> rels = m[i]->get_rels(ctx); + std::span thunk_refs = m[i]->extra.thunk_refs; + + for (i64 j = 0; j < rels.size(); j++) + if (thunk_refs[j].thunk_idx == thunk_idx) + thunk_refs[j].sym_idx = syms[rels[j].r_sym]->extra.thunk_sym_idx; + } // Move B forward to point to the begining of the next batch. b = c; @@ -295,6 +290,6 @@ void OutputSection::create_range_extension_thunks(Context &ctx) { std::max(this->shdr.sh_addralign, 1 << isec->p2align); } -} // namespace mold::elf +} // namespace mold #endif diff --git a/elf/tls.cc b/src/tls.cc similarity index 88% rename from elf/tls.cc rename to src/tls.cc index 8d391ace..8d8476d2 100644 --- a/elf/tls.cc +++ b/src/tls.cc @@ -122,44 +122,26 @@ #include "mold.h" -namespace mold::elf { - -template -static ElfPhdr *get_tls_segment(Context &ctx) { - if (ctx.phdr) - for (ElfPhdr &phdr : ctx.phdr->phdrs) - if (phdr.p_type == PT_TLS) - return &phdr; - return nullptr; -} - -template -u64 get_tls_begin(Context &ctx) { - if (ElfPhdr *phdr = get_tls_segment(ctx)) - return phdr->p_vaddr; - return 0; -} +namespace mold { // Returns the TP address which can be used for efficient TLV accesses in // the main executable. TP at runtime refers to a per-process TLS block // whose address is not known at link-time. So the address returned from // this function is the TP if the TLS template image were a TLS block. template -u64 get_tp_addr(Context &ctx) { - ElfPhdr *phdr = get_tls_segment(ctx); - if (!phdr) - return 0; +u64 get_tp_addr(const ElfPhdr &phdr) { + assert(phdr.p_type == PT_TLS); if constexpr (is_x86 || is_sparc || is_s390x) { // On x86, SPARC and s390x, TP (%gs on i386, %fs on x86-64, %g7 on SPARC // and %a0/%a1 on s390x) refers to past the end of the TLS block for // historical reasons. TLVs are accessed with negative offsets from TP. - return align_to(phdr->p_vaddr + phdr->p_memsz, phdr->p_align); - } else if constexpr (is_arm || is_sh4 || is_alpha) { - // On ARM, SH4 and Alpha, the runtime appends two words at the beginning + return align_to(phdr.p_vaddr + phdr.p_memsz, phdr.p_align); + } else if constexpr (is_arm || is_sh4) { + // On ARM and SH4, the runtime appends two words at the beginning // of TLV template image when copying TLVs to the TLS block, so we need // to offset it. - return align_down(phdr->p_vaddr - sizeof(Word) * 2, phdr->p_align); + return align_down(phdr.p_vaddr - sizeof(Word) * 2, phdr.p_align); } else if constexpr (is_ppc || is_m68k) { // On PowerPC and m68k, TP is 0x7000 (28 KiB) past the beginning // of the TLV block to maximize the addressable range of load/store @@ -167,24 +149,22 @@ u64 get_tp_addr(Context &ctx) { // (32 KiB) off because there's a small implementation-defined piece of // data before the initial TLV block, and the runtime wants to access // them efficiently too. - return phdr->p_vaddr + 0x7000; + return phdr.p_vaddr + 0x7000; } else { // RISC-V and LoongArch just uses the beginning of the main executable's // TLV block as TP. Their load/store instructions usually take 12-bits // signed immediates, so the beginning of the TLS block ± 2 KiB is // accessible with a single load/store instruction. static_assert(is_riscv || is_loongarch); - return phdr->p_vaddr; + return phdr.p_vaddr; } } // Returns the address __tls_get_addr() would return if it's called // with offset 0. template -u64 get_dtp_addr(Context &ctx) { - ElfPhdr *phdr = get_tls_segment(ctx); - if (!phdr) - return 0; +u64 get_dtp_addr(const ElfPhdr &phdr) { + assert(phdr.p_type == PT_TLS); if constexpr (is_ppc || is_m68k) { // On PowerPC and m68k, R_DTPOFF is resolved to the address 0x8000 @@ -193,21 +173,20 @@ u64 get_dtp_addr(Context &ctx) { // immediates. That is, if the offset were right at the beginning of the // start of the TLS block, the half of addressible space (negative // immediates) would have been wasted. - return phdr->p_vaddr + 0x8000; + return phdr.p_vaddr + 0x8000; } else if constexpr (is_riscv) { // On RISC-V, the bias is 0x800 as the load/store instructions in the // ISA usually have a 12-bit immediate. - return phdr->p_vaddr + 0x800; + return phdr.p_vaddr + 0x800; } else { // On other targets, DTP simply refers to the beginning of the TLS block. - return phdr->p_vaddr; + return phdr.p_vaddr; } } using E = MOLD_TARGET; -template u64 get_tls_begin(Context &); -template u64 get_tp_addr(Context &); -template u64 get_dtp_addr(Context &); +template u64 get_tp_addr(const ElfPhdr &); +template u64 get_dtp_addr(const ElfPhdr &); -} // namespace mold::elf +} // namespace mold diff --git a/test/elf/CMakeLists.txt b/test/CMakeLists.txt similarity index 95% rename from test/elf/CMakeLists.txt rename to test/CMakeLists.txt index 69a0cdae..e64a1f0e 100644 --- a/test/elf/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -31,7 +31,7 @@ endif() if(MOLD_ENABLE_QEMU_TESTS) list(APPEND QEMU_ARCHS x86_64 i386 arm aarch64 ppc ppc64 ppc64le sparc64 sh4 s390x - alpha riscv64 riscv32 m68k loongarch64) + riscv64 riscv32 m68k loongarch64) LIST(APPEND TRIPLES x86_64-linux-gnu @@ -45,7 +45,6 @@ if(MOLD_ENABLE_QEMU_TESTS) sparc64-linux-gnu s390x-linux-gnu sh4-linux-gnu - alpha-linux-gnu riscv32-linux-gnu m68k-linux-gnu loongarch64-linux-gnu) @@ -69,10 +68,10 @@ function(add_target ARCH TRIPLE) file(GLOB ALL_TESTS RELATIVE ${CMAKE_CURRENT_LIST_DIR} CONFIGURE_DEPENDS "*.sh") - list(FILTER ALL_TESTS EXCLUDE REGEX "_") + list(FILTER ALL_TESTS EXCLUDE REGEX "^arch-") file(GLOB TESTS RELATIVE ${CMAKE_CURRENT_LIST_DIR} CONFIGURE_DEPENDS - "${ARCH}_*.sh") + "arch-${ARCH}-*.sh") list(APPEND TESTS ${ALL_TESTS}) @@ -168,10 +167,6 @@ if(${MACHINE} STREQUAL "sh4" OR (HAS_qemu-sh4 AND HAS_sh4-linux-gnu-gcc)) add_target(sh4 sh4-linux-gnu) endif() -if(${MACHINE} STREQUAL "alpha" OR (HAS_qemu-alpha AND HAS_alpha-linux-gnu-gcc)) - add_target(alpha alpha-linux-gnu) -endif() - if(${MACHINE} STREQUAL "m68k" OR (HAS_qemu-m68k AND HAS_m68k-linux-gnu-gcc)) add_target(m68k m68k-linux-gnu) endif() diff --git a/test/elf/abs-error.sh b/test/abs-error.sh similarity index 94% rename from test/elf/abs-error.sh rename to test/abs-error.sh index ca1cc1d7..65499c31 100755 --- a/test/elf/abs-error.sh +++ b/test/abs-error.sh @@ -5,7 +5,6 @@ [ $MACHINE = ppc64 ] && skip [ $MACHINE = ppc64le ] && skip [ $MACHINE = s390x ] && skip -[ $MACHINE = alpha ] && skip [[ $MACHINE = loongarch* ]] && skip cat < diff --git a/test/elf/aarch64_variant-pcs.sh b/test/arch-aarch64-variant-pcs.sh similarity index 100% rename from test/elf/aarch64_variant-pcs.sh rename to test/arch-aarch64-variant-pcs.sh diff --git a/test/elf/arm_abs-error.sh b/test/arch-arm-abs-error.sh similarity index 86% rename from test/elf/arm_abs-error.sh rename to test/arch-arm-abs-error.sh index fbc57d55..3a79c43c 100755 --- a/test/elf/arm_abs-error.sh +++ b/test/arch-arm-abs-error.sh @@ -12,5 +12,7 @@ extern char foo; int main() { printf("foo=%p\n", &foo); } EOF +$CC -o $t/exe -pie $t/a.o $t/b.o >& /dev/null && skip + ! $CC -B. -o $t/exe -pie $t/a.o $t/b.o >& $t/log grep -q 'recompile with -fPIC' $t/log diff --git a/test/elf/arm_range-extension-thunk-disassembly.sh b/test/arch-arm-range-extension-thunk-disassembly.sh similarity index 100% rename from test/elf/arm_range-extension-thunk-disassembly.sh rename to test/arch-arm-range-extension-thunk-disassembly.sh diff --git a/test/elf/arm_range-extension-thunk.sh b/test/arch-arm-range-extension-thunk.sh similarity index 100% rename from test/elf/arm_range-extension-thunk.sh rename to test/arch-arm-range-extension-thunk.sh diff --git a/test/elf/arm_thumb-interwork.sh b/test/arch-arm-thumb-interwork.sh similarity index 100% rename from test/elf/arm_thumb-interwork.sh rename to test/arch-arm-thumb-interwork.sh diff --git a/test/elf/arm_tlsdesc.sh b/test/arch-arm-tlsdesc.sh similarity index 100% rename from test/elf/arm_tlsdesc.sh rename to test/arch-arm-tlsdesc.sh diff --git a/test/elf/i686_tls-module-base.sh b/test/arch-i686-tls-module-base.sh similarity index 100% rename from test/elf/i686_tls-module-base.sh rename to test/arch-i686-tls-module-base.sh diff --git a/test/elf/i686_tlsdesc.sh b/test/arch-i686-tlsdesc.sh similarity index 100% rename from test/elf/i686_tlsdesc.sh rename to test/arch-i686-tlsdesc.sh diff --git a/test/elf/loongarch64_mcmodel-extreme.sh b/test/arch-loongarch64-mcmodel-extreme.sh similarity index 100% rename from test/elf/loongarch64_mcmodel-extreme.sh rename to test/arch-loongarch64-mcmodel-extreme.sh diff --git a/test/arch-loongarch64-relax-call36.sh b/test/arch-loongarch64-relax-call36.sh new file mode 100755 index 00000000..34e40982 --- /dev/null +++ b/test/arch-loongarch64-relax-call36.sh @@ -0,0 +1,52 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat <<'EOF' | $CC -o $t/a.o -c -xassembler - +.globl foo, bar +.space 0x100000 +foo: + move $s0, $ra + .reloc ., R_LARCH_CALL36, print + .reloc ., R_LARCH_RELAX + pcaddu18i $t0, 0 + jirl $ra, $t0, 0 + move $ra, $s0 + ret +bar: + .reloc ., R_LARCH_CALL36, print + .reloc ., R_LARCH_RELAX + pcaddu18i $t0, 0 + jirl $zero, $t0, 0 +.space 0x100000 +EOF + +cat < + +void foo(); +void bar(); + +void print() { + printf("foo"); +} + +int main() { + foo(); + bar(); + printf("\n"); +} +EOF + +$CC -B. -o $t/exe1 $t/a.o $t/b.o -Wl,--no-relax +$QEMU $t/exe1 | grep -q foofoo + +$OBJDUMP -d $t/exe1 > $t/exe1.objdump +grep -A2 ':' $t/exe1.objdump | grep -wq pcaddu18i +grep -A2 ':' $t/exe1.objdump | grep -wq pcaddu18i + +$CC -B. -o $t/exe2 $t/a.o $t/b.o -Wl,--relax +$QEMU $t/exe2 | grep -q foofoo + +$OBJDUMP -d $t/exe2 > $t/exe2.objdump +grep -A2 ':' $t/exe2.objdump | grep -wq bl +grep -A2 ':' $t/exe2.objdump | grep -wq b diff --git a/test/arch-loongarch64-relax-got-load.sh b/test/arch-loongarch64-relax-got-load.sh new file mode 100755 index 00000000..279fa8b5 --- /dev/null +++ b/test/arch-loongarch64-relax-got-load.sh @@ -0,0 +1,33 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +int get_foo(); +int main() { printf("%d\n", get_foo()); } +EOF + +$CC -B. -o $t/exe1 $t/a.o $t/b.o $t/c.o -pie -Wl,--no-relax +$QEMU $t/exe1 | grep -q '^3$' +$OBJDUMP -d $t/exe1 | grep -A2 ':' | grep -Fqw pcalau12i +$OBJDUMP -d $t/exe1 | grep -A2 ':' | grep -Fqw ld.d + +$CC -B. -o $t/exe2 $t/a.o $t/b.o $t/c.o -pie -Wl,--relax +$QEMU $t/exe2 | grep -q '^3$' +$OBJDUMP -d $t/exe2 | grep -A1 ':' | grep -Fqw pcaddi + +$CC -B. -o $t/exe3 $t/a.o $t/b.o $t/c.o -pie -Wl,--relax \ + -Wl,-Ttext=0x1000000,-Tdata=0x2000000 + +$QEMU $t/exe3 | grep -q '^3$' +$OBJDUMP -d $t/exe3 | grep -A2 ':' | grep -Fqw pcalau12i +$OBJDUMP -d $t/exe3 | grep -A2 ':' | grep -Fqw addi.d diff --git a/test/arch-loongarch64-relax-pcala-addi.sh b/test/arch-loongarch64-relax-pcala-addi.sh new file mode 100755 index 00000000..fe26c73c --- /dev/null +++ b/test/arch-loongarch64-relax-pcala-addi.sh @@ -0,0 +1,58 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat <<'EOF' | $CC -o $t/a.o -c -xassembler - +.globl get_sym1, get_sym2, get_sym3 +get_sym1: + la.pcrel $a0, sym1 + ret +get_sym2: + la.pcrel $a0, sym2 + ret +get_sym3: + la.pcrel $a0, sym3 + ret +EOF + +cat <<'EOF' | $CC -o $t/b.o -c -xassembler - +.globl sym1, sym2, sym3 +sym1: + li.d $a0, 1 + ret +.space 1024 * 1024 +sym2: + li.d $a0, 2 + ret +.space 1024 * 1024 +sym3: + li.d $a0, 3 + ret +EOF + +cat < + +int (*get_sym1())(); +int (*get_sym2())(); +int (*get_sym3())(); + +int main() { + printf("%d %d %d\n", get_sym1()(), get_sym2()(), get_sym3()()); +} +EOF + +$CC -B. -o $t/exe1 $t/a.o $t/b.o $t/c.o -Wl,--no-relax +$QEMU $t/exe1 | grep -q '^1 2 3$' + +$OBJDUMP -d $t/exe1 > $t/exe1.objdump +grep -A1 ':' $t/exe1.objdump | grep -q pcalau12i +grep -A1 ':' $t/exe1.objdump | grep -q pcalau12i +grep -A1 ':' $t/exe1.objdump | grep -q pcalau12i + +$CC -B. -o $t/exe2 $t/a.o $t/b.o $t/c.o -Wl,--relax +$QEMU $t/exe2 | grep -q '^1 2 3$' + +$OBJDUMP -d $t/exe2 > $t/exe2.objdump +grep -A1 ':' $t/exe2.objdump | grep -q pcaddi +grep -A1 ':' $t/exe2.objdump | grep -q pcaddi +grep -A1 ':' $t/exe2.objdump | grep -q pcalau12i diff --git a/test/arch-loongarch64-relax-tlsdesc.sh b/test/arch-loongarch64-relax-tlsdesc.sh new file mode 100755 index 00000000..37b44715 --- /dev/null +++ b/test/arch-loongarch64-relax-tlsdesc.sh @@ -0,0 +1,43 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat <<'EOF' | $CC -o $t/a.o -c -xc - -fPIC +_Thread_local char foo[4] = "foo"; +_Thread_local char padding[100000] = "padding"; +EOF + +cat <<'EOF' | $CC -o $t/b.o -c -xc - -fPIC +_Thread_local char bar[4] = "bar"; +EOF + +cat <<'EOF' | $CC -o $t/c.o -c -xc - -fPIC -mtls-dialect=desc -O2 +extern _Thread_local char foo[4]; +extern _Thread_local char bar[4]; + +char *get_foo() { return foo; } +char *get_bar() { return bar; } +EOF + +cat < +char *get_foo(); +char *get_bar(); + +int main() { + printf("%s %s\n", get_foo(), get_bar()); +} +EOF + +$CC -B. -o $t/exe1 $t/a.o $t/b.o $t/c.o $t/d.o -Wl,--no-relax +$QEMU $t/exe1 | grep -q 'foo bar' + +$OBJDUMP -d $t/exe1 > $t/exe1.objdump +grep -A6 ':' $t/exe1.objdump | grep -Fq pcalau12i +grep -A6 ':' $t/exe1.objdump | grep -Fq pcalau12i + +$CC -B. -o $t/exe2 $t/a.o $t/b.o $t/c.o $t/d.o -Wl,--relax +$QEMU $t/exe2 | grep -q 'foo bar' + +$OBJDUMP -d $t/exe2 > $t/exe2.objdump +grep -A6 ':' $t/exe2.objdump | grep -Fq li.w +grep -A6 ':' $t/exe2.objdump | grep -Fq lu12i.w diff --git a/test/elf/ppc64le_save_restore_gprs.sh b/test/arch-ppc64le-save-restore-gprs.sh similarity index 100% rename from test/elf/ppc64le_save_restore_gprs.sh rename to test/arch-ppc64le-save-restore-gprs.sh diff --git a/test/elf/riscv64_attributes.sh b/test/arch-riscv64-attributes.sh similarity index 100% rename from test/elf/riscv64_attributes.sh rename to test/arch-riscv64-attributes.sh diff --git a/test/elf/riscv64_attributes2.sh b/test/arch-riscv64-attributes2.sh similarity index 100% rename from test/elf/riscv64_attributes2.sh rename to test/arch-riscv64-attributes2.sh diff --git a/test/elf/riscv64_global-pointer-dso.sh b/test/arch-riscv64-global-pointer-dso.sh similarity index 100% rename from test/elf/riscv64_global-pointer-dso.sh rename to test/arch-riscv64-global-pointer-dso.sh diff --git a/test/elf/riscv64_global-pointer.sh b/test/arch-riscv64-global-pointer.sh similarity index 100% rename from test/elf/riscv64_global-pointer.sh rename to test/arch-riscv64-global-pointer.sh diff --git a/test/elf/riscv64_norvc.sh b/test/arch-riscv64-norvc.sh similarity index 100% rename from test/elf/riscv64_norvc.sh rename to test/arch-riscv64-norvc.sh diff --git a/test/elf/riscv64_obj-compatible.sh b/test/arch-riscv64-obj-compatible.sh similarity index 100% rename from test/elf/riscv64_obj-compatible.sh rename to test/arch-riscv64-obj-compatible.sh diff --git a/test/elf/riscv64_relax-got.sh b/test/arch-riscv64-relax-got.sh similarity index 100% rename from test/elf/riscv64_relax-got.sh rename to test/arch-riscv64-relax-got.sh diff --git a/test/elf/riscv64_relax-hi20.sh b/test/arch-riscv64-relax-hi20.sh similarity index 92% rename from test/elf/riscv64_relax-hi20.sh rename to test/arch-riscv64-relax-hi20.sh index a0befcda..fb4774eb 100755 --- a/test/elf/riscv64_relax-hi20.sh +++ b/test/arch-riscv64-relax-hi20.sh @@ -46,3 +46,5 @@ $QEMU $t/exe1 | grep -q 'f00 10000f00 ba 11beef' $CC -B. -o $t/exe2 $t/a.o $t/b.o $t/c.o $QEMU $t/exe2 | grep -q 'f00 10000f00 ba 11beef' + +[ $(stat --format='%s' $t/exe1) -gt $(stat --format='%s' $t/exe2) ] diff --git a/test/elf/riscv64_weak-undef.sh b/test/arch-riscv64-weak-undef.sh similarity index 100% rename from test/elf/riscv64_weak-undef.sh rename to test/arch-riscv64-weak-undef.sh diff --git a/test/elf/s390x_got.sh b/test/arch-s390x-got.sh similarity index 76% rename from test/elf/s390x_got.sh rename to test/arch-s390x-got.sh index 60234e42..ac061d4f 100755 --- a/test/elf/s390x_got.sh +++ b/test/arch-s390x-got.sh @@ -10,9 +10,9 @@ extern char _DYNAMIC; extern void *got[]; int main() { - printf("%p %p\n", &_DYNAMIC, got[0]); + printf("%d %p %p\n", &_DYNAMIC == got[0], &_DYNAMIC, got[0]); } EOF $CC -B. -o $t/exe $t/a.o -Wl,-defsym=got=_GLOBAL_OFFSET_TABLE_ -no-pie -$QEMU $t/exe | grep -Eq '^(\S+) \1$' +$QEMU $t/exe | grep -Eq '^1' diff --git a/test/arch-x86_64-address-equality.sh b/test/arch-x86_64-address-equality.sh new file mode 100755 index 00000000..ccdf7528 --- /dev/null +++ b/test/arch-x86_64-address-equality.sh @@ -0,0 +1,28 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +void *foo(); +void bar(); +int main() { printf("%d %p %p\n", foo() == bar, foo(), bar); } +EOF + +cat <& /dev/null || skip mkdir -p $t/foo diff --git a/test/elf/x86_64_incompatible-libs.sh b/test/arch-x86_64-incompatible-libs.sh similarity index 100% rename from test/elf/x86_64_incompatible-libs.sh rename to test/arch-x86_64-incompatible-libs.sh diff --git a/test/elf/x86_64_incompatible-libs2.sh b/test/arch-x86_64-incompatible-libs2.sh similarity index 100% rename from test/elf/x86_64_incompatible-libs2.sh rename to test/arch-x86_64-incompatible-libs2.sh diff --git a/test/elf/x86_64_incompatible-obj.sh b/test/arch-x86_64-incompatible-obj.sh similarity index 100% rename from test/elf/x86_64_incompatible-obj.sh rename to test/arch-x86_64-incompatible-obj.sh diff --git a/test/elf/x86_64_init-array-readonly.sh b/test/arch-x86_64-init-array-readonly.sh similarity index 100% rename from test/elf/x86_64_init-array-readonly.sh rename to test/arch-x86_64-init-array-readonly.sh diff --git a/test/elf/x86_64_init-array.sh b/test/arch-x86_64-init-array.sh similarity index 100% rename from test/elf/x86_64_init-array.sh rename to test/arch-x86_64-init-array.sh diff --git a/test/arch-x86_64-isa-level.sh b/test/arch-x86_64-isa-level.sh new file mode 100755 index 00000000..d51afd8d --- /dev/null +++ b/test/arch-x86_64-isa-level.sh @@ -0,0 +1,17 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log -grep -Fq '01 .note.baz .note.foo .note.bar' $t/log +grep -Fq '01 .note.bar .note.baz .note.foo' $t/log ! grep -q 'NOTE.*0x0000000000000000 0x0000000000000000' $t/log || false diff --git a/test/elf/x86_64_note2.sh b/test/arch-x86_64-note2.sh similarity index 93% rename from test/elf/x86_64_note2.sh rename to test/arch-x86_64-note2.sh index 24ebef58..e2bb3036 100755 --- a/test/elf/x86_64_note2.sh +++ b/test/arch-x86_64-note2.sh @@ -29,4 +29,4 @@ EOF ./mold -o $t/exe $t/a.o $t/b.o $t/c.o $t/d.o readelf --segments $t/exe > $t/log -grep -Fq '01 .note.a .note.c .note.b' $t/log +grep -Fq '01 .note.a .note.b .note.c' $t/log diff --git a/test/elf/x86_64_plt.sh b/test/arch-x86_64-plt.sh similarity index 100% rename from test/elf/x86_64_plt.sh rename to test/arch-x86_64-plt.sh diff --git a/test/elf/x86_64_preinit-array.sh b/test/arch-x86_64-preinit-array.sh similarity index 100% rename from test/elf/x86_64_preinit-array.sh rename to test/arch-x86_64-preinit-array.sh diff --git a/test/elf/x86_64_relax.sh b/test/arch-x86_64-relax.sh similarity index 100% rename from test/elf/x86_64_relax.sh rename to test/arch-x86_64-relax.sh diff --git a/test/elf/x86_64_reloc-overflow.sh b/test/arch-x86_64-reloc-overflow.sh similarity index 100% rename from test/elf/x86_64_reloc-overflow.sh rename to test/arch-x86_64-reloc-overflow.sh diff --git a/test/elf/x86_64_reloc-zero.sh b/test/arch-x86_64-reloc-zero.sh similarity index 100% rename from test/elf/x86_64_reloc-zero.sh rename to test/arch-x86_64-reloc-zero.sh diff --git a/test/elf/x86_64_reloc.sh b/test/arch-x86_64-reloc.sh similarity index 100% rename from test/elf/x86_64_reloc.sh rename to test/arch-x86_64-reloc.sh diff --git a/test/elf/x86_64_section-alignment.sh b/test/arch-x86_64-section-alignment.sh similarity index 100% rename from test/elf/x86_64_section-alignment.sh rename to test/arch-x86_64-section-alignment.sh diff --git a/test/elf/x86_64_section-name.sh b/test/arch-x86_64-section-name.sh similarity index 100% rename from test/elf/x86_64_section-name.sh rename to test/arch-x86_64-section-name.sh diff --git a/test/arch-x86_64-tbss-only.sh b/test/arch-x86_64-tbss-only.sh new file mode 100755 index 00000000..6ebdb453 --- /dev/null +++ b/test/arch-x86_64-tbss-only.sh @@ -0,0 +1,19 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +# Test if grep supports backreferences +echo abab | grep -Eq '(ab)\1' || skip + +cat <&1 | grep -q 'may cause a segmentation fault' +$GCC -B. -o $t/exe $t/a.o $t/b.o 2>&1 | grep -Eq 'may cause a segmentation fault|requires executable stack' diff --git a/test/elf/x86_64_warn-shared-textrel.sh b/test/arch-x86_64-warn-shared-textrel.sh similarity index 100% rename from test/elf/x86_64_warn-shared-textrel.sh rename to test/arch-x86_64-warn-shared-textrel.sh diff --git a/test/elf/x86_64_warn-textrel.sh b/test/arch-x86_64-warn-textrel.sh similarity index 100% rename from test/elf/x86_64_warn-textrel.sh rename to test/arch-x86_64-warn-textrel.sh diff --git a/test/elf/x86_64_z-ibt.sh b/test/arch-x86_64-z-ibt.sh similarity index 100% rename from test/elf/x86_64_z-ibt.sh rename to test/arch-x86_64-z-ibt.sh diff --git a/test/elf/x86_64_z-ibtplt.sh b/test/arch-x86_64-z-ibtplt.sh similarity index 100% rename from test/elf/x86_64_z-ibtplt.sh rename to test/arch-x86_64-z-ibtplt.sh diff --git a/test/elf/x86_64_endbr.sh b/test/arch-x86_64-z-rewrite-endbr.sh similarity index 100% rename from test/elf/x86_64_endbr.sh rename to test/arch-x86_64-z-rewrite-endbr.sh diff --git a/test/elf/x86_64_endbr2.sh b/test/arch-x86_64-z-rewrite-endbr2.sh similarity index 100% rename from test/elf/x86_64_endbr2.sh rename to test/arch-x86_64-z-rewrite-endbr2.sh diff --git a/test/arch-x86_64-z-rewrite-endbr3.sh b/test/arch-x86_64-z-rewrite-endbr3.sh new file mode 100755 index 00000000..f8358542 --- /dev/null +++ b/test/arch-x86_64-z-rewrite-endbr3.sh @@ -0,0 +1,19 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +test_cflags -fcf-protection || skip +[ "$QEMU" == '' ] || skip + +# Check if Intel SDE CPU emulator is available +command -v sde >& /dev/null || skip +sde --help | grep -q 'Software Development Emulator' || skip + +cat < +int main() { + printf("Hello world\n"); +} +EOF + +$CC -B. -o $t/exe $t/a.o -Wl,-z,rewrite-endbr +sde -cet 1 -- $t/exe | grep -q 'Hello world' diff --git a/test/elf/x86_64_z-shstk.sh b/test/arch-x86_64-z-shstk.sh similarity index 100% rename from test/elf/x86_64_z-shstk.sh rename to test/arch-x86_64-z-shstk.sh diff --git a/test/elf/x86_64_z-text.sh b/test/arch-x86_64-z-text.sh similarity index 100% rename from test/elf/x86_64_z-text.sh rename to test/arch-x86_64-z-text.sh diff --git a/test/elf/as-needed-dso.sh b/test/as-needed-dso.sh similarity index 93% rename from test/elf/as-needed-dso.sh rename to test/as-needed-dso.sh index 40f0a46b..60fd6bd6 100755 --- a/test/elf/as-needed-dso.sh +++ b/test/as-needed-dso.sh @@ -18,4 +18,4 @@ EOF $CC -B. -o $t/exe $t/a.o -L$t -Wl,--as-needed -lbar -lfoo readelf -W --dynamic $t/exe > $t/log2 grep -q libbar $t/log2 -! grep -q libfoo $t/log2 || false +grep -q libfoo $t/log2 diff --git a/test/elf/as-needed-dso2.sh b/test/as-needed-dso2.sh similarity index 100% rename from test/elf/as-needed-dso2.sh rename to test/as-needed-dso2.sh diff --git a/test/elf/as-needed-weak.sh b/test/as-needed-weak.sh similarity index 77% rename from test/elf/as-needed-weak.sh rename to test/as-needed-weak.sh index 112561fc..fc432300 100755 --- a/test/elf/as-needed-weak.sh +++ b/test/as-needed-weak.sh @@ -18,14 +18,14 @@ cat < $t/log1 grep -Fq 'Shared library: [libfoo.so]' $t/log1 grep -Fq 'Shared library: [libbar.so]' $t/log1 -$CC -o $t/exe2 $t/a.o -Wl,-as-needed -L$t -lbar -lfoo +$CC -B. -o $t/exe2 $t/a.o -Wl,-as-needed -L$t -lbar -lfoo readelf --dynamic $t/exe2 > $t/log2 -! grep -Fq 'Shared library: [libfoo.so]' $t/log2 || false +grep -Fq 'Shared library: [libfoo.so]' $t/log2 ! grep -Fq 'Shared library: [libbar.so]' $t/log2 || false diff --git a/test/elf/as-needed.sh b/test/as-needed.sh similarity index 60% rename from test/elf/as-needed.sh rename to test/as-needed.sh index b0389c27..6d5448c8 100755 --- a/test/elf/as-needed.sh +++ b/test/as-needed.sh @@ -18,12 +18,12 @@ EOF $CC -B. -o $t/exe $t/a.o -Wl,--no-as-needed $t/b.so $t/c.so -readelf --dynamic $t/exe > $t/readelf -grep -Fq 'Shared library: [libfoo.so]' $t/readelf -grep -Fq 'Shared library: [libbar.so]' $t/readelf +readelf --dynamic $t/exe > $t/log +grep -Fq 'Shared library: [libfoo.so]' $t/log +grep -Fq 'Shared library: [libbar.so]' $t/log $CC -B. -o $t/exe $t/a.o -Wl,--as-needed $t/b.so $t/c.so -readelf --dynamic $t/exe > $t/readelf -grep -Fq 'Shared library: [libfoo.so]' $t/readelf -! grep -Fq 'Shared library: [libbar.so]' $t/readelf || false +readelf --dynamic $t/exe > $t/log +grep -Fq 'Shared library: [libfoo.so]' $t/log +! grep -Fq 'Shared library: [libbar.so]' $t/log || false diff --git a/test/elf/auxiliary.sh b/test/auxiliary.sh similarity index 100% rename from test/elf/auxiliary.sh rename to test/auxiliary.sh diff --git a/test/elf/bno-symbolic.sh b/test/bno-symbolic.sh similarity index 100% rename from test/elf/bno-symbolic.sh rename to test/bno-symbolic.sh diff --git a/test/elf/bsymbolic-functions.sh b/test/bsymbolic-functions.sh similarity index 100% rename from test/elf/bsymbolic-functions.sh rename to test/bsymbolic-functions.sh diff --git a/test/elf/bsymbolic-non-weak-functions.sh b/test/bsymbolic-non-weak-functions.sh similarity index 100% rename from test/elf/bsymbolic-non-weak-functions.sh rename to test/bsymbolic-non-weak-functions.sh diff --git a/test/elf/bsymbolic-non-weak.sh b/test/bsymbolic-non-weak.sh similarity index 100% rename from test/elf/bsymbolic-non-weak.sh rename to test/bsymbolic-non-weak.sh diff --git a/test/elf/bsymbolic.sh b/test/bsymbolic.sh similarity index 100% rename from test/elf/bsymbolic.sh rename to test/bsymbolic.sh diff --git a/test/elf/build-id.sh b/test/build-id.sh similarity index 87% rename from test/elf/build-id.sh rename to test/build-id.sh index acff861e..d2310925 100755 --- a/test/elf/build-id.sh +++ b/test/build-id.sh @@ -18,5 +18,8 @@ readelf -n $t/exe | grep -q 'GNU.*0x00000014.*NT_GNU_BUILD_ID' $CC -B. -o $t/exe $t/a.c -Wl,-build-id=sha256 readelf -n $t/exe | grep -q 'GNU.*0x00000020.*NT_GNU_BUILD_ID' +$CC -B. -o $t/exe $t/a.c -Wl,-build-id=fast +readelf -n $t/exe | grep -q 'GNU.*0x00000020.*NT_GNU_BUILD_ID' + $CC -B. -o $t/exe $t/a.c -Wl,-build-id=0xdeadbeefdeadbeef readelf -n $t/exe | grep -q 'Build ID: deadbeefdeadbeef' diff --git a/test/elf/canonical-plt.sh b/test/canonical-plt.sh similarity index 100% rename from test/elf/canonical-plt.sh rename to test/canonical-plt.sh diff --git a/test/elf/cmdline.sh b/test/cmdline.sh similarity index 100% rename from test/elf/cmdline.sh rename to test/cmdline.sh diff --git a/test/elf/color-diagnostics.sh b/test/color-diagnostics.sh similarity index 100% rename from test/elf/color-diagnostics.sh rename to test/color-diagnostics.sh diff --git a/test/elf/comment.sh b/test/comment.sh similarity index 100% rename from test/elf/comment.sh rename to test/comment.sh diff --git a/test/elf/common-archive.sh b/test/common-archive.sh similarity index 100% rename from test/elf/common-archive.sh rename to test/common-archive.sh diff --git a/test/elf/common-ref.sh b/test/common-ref.sh similarity index 100% rename from test/elf/common-ref.sh rename to test/common-ref.sh diff --git a/test/elf/common.sh b/test/common-symbols.sh similarity index 100% rename from test/elf/common.sh rename to test/common-symbols.sh diff --git a/test/elf/common.inc b/test/common.inc similarity index 74% rename from test/elf/common.inc rename to test/common.inc index 5200ff31..fdad9f27 100644 --- a/test/elf/common.inc +++ b/test/common.inc @@ -6,6 +6,7 @@ export LC_ALL=C canonical_name() { case $1 in i?86) echo i686 ;; + amd64) echo x86_64 ;; arm*) echo arm ;; powerpc) echo ppc ;; powerpc64) echo ppc64 ;; @@ -20,7 +21,7 @@ fi # Set tool names if [ -z "$TRIPLE" ]; then - TESTDIR=out/test/elf/$MACHINE + TESTDIR=out/test/$MACHINE CC="${TEST_CC:-cc}" CXX="${TEST_CXX:-c++}" GCC="${TEST_GCC:-gcc}" @@ -31,7 +32,7 @@ if [ -z "$TRIPLE" ]; then QEMU= elif [ "$TRIPLE" = powerpc64le-linux-gnu -a "$CPU" = power10 ]; then MACHINE=ppc64le - TESTDIR=out/test/elf/ppc64le-power10 + TESTDIR=out/test/ppc64le-power10 CC="${TEST_CC:-$TRIPLE-gcc} -mcpu=power10" CXX="${TEST_CXX:-$TRIPLE-g++} -mcpu=power10" GCC="${TEST_GCC:-$TRIPLE-gcc} -mcpu=power10" @@ -42,7 +43,7 @@ elif [ "$TRIPLE" = powerpc64le-linux-gnu -a "$CPU" = power10 ]; then QEMU="qemu-ppc64le -L /usr/$TRIPLE -cpu power10" else MACHINE=$(canonical_name $(echo $TRIPLE | sed 's/-.*//')) - TESTDIR=out/test/elf/$MACHINE + TESTDIR=out/test/$MACHINE CC="${TEST_CC:-$TRIPLE-gcc}" CXX="${TEST_CXX:-$TRIPLE-g++}" GCC="${TEST_GCC:-$TRIPLE-gcc}" @@ -58,19 +59,26 @@ else fi fi -if [ $MACHINE = x86_64 -o $MACHINE = i686 -o $MACHINE = arm ]; then - tlsdesc_opt=-mtls-dialect=gnu2 -elif [ $MACHINE = aarch64 ]; then - tlsdesc_opt=-mtls-dialect=desc +case $MACHINE in +x86_64 | i686 | arm) + tlsdesc_opt=-mtls-dialect=gnu2 ;; +aarch64 | loongarch*) + tlsdesc_opt=-mtls-dialect=desc ;; +esac + +# We want to use GNU's binutils even on BSDs. `pkg install binutils` +# installs GNU binutils under /usr/local/bin. +if [ "$(uname)" = FreeBSD ]; then + export PATH="/usr/local/bin:$PATH" fi # Common functions test_cflags() { - echo 'int main() {}' | $CC "$@" -o /dev/null -xc - >& /dev/null + echo 'int main() {}' | $CC -B. "$@" -o /dev/null -xc - >& /dev/null } test_cxxflags() { - echo 'int main() {}' | $CXX "$@" -o /dev/null -xc++ - >& /dev/null + echo 'int main() {}' | $CXX -B. "$@" -o /dev/null -xc++ - >& /dev/null } is_musl() { @@ -87,7 +95,10 @@ supports_tlsdesc() { # musl's tlsdesc on arm32 seems to be broken [ $MACHINE = arm ] && is_musl && return 1 - [ -n "$tlsdesc_opt" ] + # FreeBSD's loader doesn't seem to support TLSDESC relocs in an executable + [ "$(uname)" = FreeBSD ] && return 1 + + [ "$tlsdesc_opt" != '' ] } on_qemu() { @@ -120,3 +131,4 @@ testname=$(basename "$0" .sh) echo -n "Testing $testname ... " t=$TESTDIR/$testname mkdir -p $t +set -x diff --git a/test/elf/compress-debug-sections-zstd.sh b/test/compress-debug-sections-zstd.sh similarity index 100% rename from test/elf/compress-debug-sections-zstd.sh rename to test/compress-debug-sections-zstd.sh diff --git a/test/elf/compress-debug-sections.sh b/test/compress-debug-sections.sh similarity index 100% rename from test/elf/compress-debug-sections.sh rename to test/compress-debug-sections.sh diff --git a/test/elf/compressed-debug-info.sh b/test/compressed-debug-info.sh similarity index 100% rename from test/elf/compressed-debug-info.sh rename to test/compressed-debug-info.sh diff --git a/test/elf/copyrel-alignment.sh b/test/copyrel-alignment.sh similarity index 96% rename from test/elf/copyrel-alignment.sh rename to test/copyrel-alignment.sh index 4b265ac7..432179bd 100755 --- a/test/elf/copyrel-alignment.sh +++ b/test/copyrel-alignment.sh @@ -3,7 +3,6 @@ [ $MACHINE = ppc64 ] && skip [ $MACHINE = ppc64le ] && skip -[ $MACHINE = alpha ] && skip [[ $MACHINE = loongarch* ]] && skip cat <& $t/log -no-pie || false -grep -Fq 'cannot make copy relocation for protected symbol' $t/log +grep -Fq 'cannot create a copy relocation for protected symbol' $t/log diff --git a/test/elf/copyrel-relro.sh b/test/copyrel-relro.sh similarity index 100% rename from test/elf/copyrel-relro.sh rename to test/copyrel-relro.sh diff --git a/test/elf/copyrel-relro2.sh b/test/copyrel-relro2.sh similarity index 100% rename from test/elf/copyrel-relro2.sh rename to test/copyrel-relro2.sh diff --git a/test/elf/copyrel.sh b/test/copyrel.sh similarity index 100% rename from test/elf/copyrel.sh rename to test/copyrel.sh diff --git a/test/elf/ctors-in-init-array.sh b/test/ctors-in-init-array.sh similarity index 100% rename from test/elf/ctors-in-init-array.sh rename to test/ctors-in-init-array.sh diff --git a/test/elf/dead-debug-sections.sh b/test/dead-debug-sections.sh similarity index 100% rename from test/elf/dead-debug-sections.sh rename to test/dead-debug-sections.sh diff --git a/test/elf/debug-macro-section.sh b/test/debug-macro-section.sh similarity index 100% rename from test/elf/debug-macro-section.sh rename to test/debug-macro-section.sh diff --git a/test/elf/default-symver.sh b/test/default-symver.sh similarity index 100% rename from test/elf/default-symver.sh rename to test/default-symver.sh diff --git a/test/elf/defsym-lto.sh b/test/defsym-lto.sh similarity index 79% rename from test/elf/defsym-lto.sh rename to test/defsym-lto.sh index 3848384b..d60b83df 100755 --- a/test/elf/defsym-lto.sh +++ b/test/defsym-lto.sh @@ -1,8 +1,7 @@ #!/bin/bash . $(dirname $0)/common.inc -echo 'int main() {}' | $CC -flto -o /dev/null -xc - >& /dev/null \ - || skip +test_cflags -flto || skip cat < diff --git a/test/elf/defsym-missing-symbol.sh b/test/defsym-missing-symbol.sh similarity index 100% rename from test/elf/defsym-missing-symbol.sh rename to test/defsym-missing-symbol.sh diff --git a/test/elf/defsym.sh b/test/defsym.sh similarity index 100% rename from test/elf/defsym.sh rename to test/defsym.sh diff --git a/test/elf/defsym2.sh b/test/defsym2.sh similarity index 100% rename from test/elf/defsym2.sh rename to test/defsym2.sh diff --git a/test/elf/demangle-cpp.sh b/test/demangle-cpp.sh similarity index 100% rename from test/elf/demangle-cpp.sh rename to test/demangle-cpp.sh diff --git a/test/elf/demangle-rust.sh b/test/demangle-rust.sh similarity index 100% rename from test/elf/demangle-rust.sh rename to test/demangle-rust.sh diff --git a/test/elf/demangle.sh b/test/demangle.sh similarity index 100% rename from test/elf/demangle.sh rename to test/demangle.sh diff --git a/test/elf/dependency-file-response-file.sh b/test/dependency-file-response-file.sh similarity index 100% rename from test/elf/dependency-file-response-file.sh rename to test/dependency-file-response-file.sh diff --git a/test/elf/dependency-file.sh b/test/dependency-file.sh similarity index 100% rename from test/elf/dependency-file.sh rename to test/dependency-file.sh diff --git a/test/elf/disable-new-dtags.sh b/test/disable-new-dtags.sh similarity index 100% rename from test/elf/disable-new-dtags.sh rename to test/disable-new-dtags.sh diff --git a/test/elf/discard.sh b/test/discard.sh similarity index 91% rename from test/elf/discard.sh rename to test/discard.sh index e419838b..b7628c7e 100755 --- a/test/elf/discard.sh +++ b/test/discard.sh @@ -1,7 +1,8 @@ #!/bin/bash . $(dirname $0)/common.inc -[ $MACHINE = riscv64 -o $MACHINE = riscv32 ] && skip +[[ $MACHINE = riscv* ]] && skip +[[ $MACHINE = loongarch* ]] && skip cat < $t/log +grep -wq foo $t/log +! grep -wq bar $t/log || false diff --git a/test/elf/dynamic-list.sh b/test/dynamic-list.sh similarity index 100% rename from test/elf/dynamic-list.sh rename to test/dynamic-list.sh diff --git a/test/elf/dynamic-list2.sh b/test/dynamic-list2.sh similarity index 100% rename from test/elf/dynamic-list2.sh rename to test/dynamic-list2.sh diff --git a/test/elf/dynamic-list3.sh b/test/dynamic-list3.sh similarity index 100% rename from test/elf/dynamic-list3.sh rename to test/dynamic-list3.sh diff --git a/test/elf/dynamic-list4.sh b/test/dynamic-list4.sh similarity index 100% rename from test/elf/dynamic-list4.sh rename to test/dynamic-list4.sh diff --git a/test/elf/dynamic.sh b/test/dynamic.sh similarity index 84% rename from test/elf/dynamic.sh rename to test/dynamic.sh index ce207c6a..2b9576c2 100755 --- a/test/elf/dynamic.sh +++ b/test/dynamic.sh @@ -9,7 +9,7 @@ readelf --dynamic $t/exe > $t/log grep -Eq 'Shared library:.*\blibc\b' $t/log readelf -W --dyn-syms --use-dynamic $t/exe > $t/log2 -grep -Eq 'FUNC\s+GLOBAL\s+DEFAULT.*UND\s+__libc_start_main' $t/log2 +grep -Eq 'FUNC\s+GLOBAL\s+DEFAULT.*UND\s+__libc_start' $t/log2 cat < diff --git a/test/elf/mold-jobs.sh b/test/elf/mold-jobs.sh deleted file mode 100755 index 46af6628..00000000 --- a/test/elf/mold-jobs.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -. $(dirname $0)/common.inc - -# Looks like lockf doesn't work correctly on qemu-riscv64 -[ $MACHINE = riscv64 -o $MACHINE = riscv32 ] && skip - -cat < -int main() { - printf("Hello world\n"); -} -EOF - -for i in `seq 1 20`; do - rm -f $t/exe$i - ( MOLD_JOBS=2 $CC -B. -o $t/exe$i $t/a.o -no-pie; echo $i) & -done - -wait - -for i in `seq 1 20`; do - $QEMU $t/exe$i | grep -q 'Hello world' -done diff --git a/test/elf/now.sh b/test/elf/now.sh deleted file mode 100755 index 37b83d26..00000000 --- a/test/elf/now.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -. $(dirname $0)/common.inc - -cat < - -void foo() { - printf("Hello world\n"); -} -EOF - -$CC -B. -shared -o $t/b.so $t/a.o -Wl,-z,now -readelf --dynamic $t/b.so | grep -q 'Flags: NOW' - -$CC -B. -shared -o $t/b.so $t/a.o -Wl,-z,now,-z,lazy -readelf --dynamic $t/b.so > $t/log -! grep -q 'Flags: NOW' $t/log || false diff --git a/test/elf/pack-dyn-relocs-relr.sh b/test/elf/pack-dyn-relocs-relr.sh deleted file mode 100755 index c2cad3f8..00000000 --- a/test/elf/pack-dyn-relocs-relr.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -. $(dirname $0)/common.inc - -[ $MACHINE = m68k ] && skip -[ $MACHINE = ppc ] && skip - -command -v llvm-readelf >& /dev/null || skip - -cat < -int main() { - printf("Hello world\n"); -} -EOF - -$CC -B. -o $t/exe1 $t/a.o -pie -llvm-readelf -r $t/exe1 | grep RELATIVE | wc -l > $t/log1 - -$CC -B. -o $t/exe2 $t/a.o -pie -Wl,-pack-dyn-relocs=relr -llvm-readelf -r $t/exe2 | grep RELATIVE | wc -l > $t/log2 - -diff $t/log1 $t/log2 - -llvm-readelf --dynamic $t/exe2 > $t/log3 -grep -wq RELR $t/log3 -grep -wq RELRSZ $t/log3 -grep -wq RELRENT $t/log3 diff --git a/test/elf/package-metadata.sh b/test/elf/package-metadata.sh deleted file mode 100755 index 4c673bc5..00000000 --- a/test/elf/package-metadata.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash -. $(dirname $0)/common.inc - -cat < -int main() { - printf("Hello world\n"); -} -EOF - -$CC -B. -o $t/exe $t/a.o -Wl,-package-metadata='{"foo":"bar"}' -readelf -x .note.package $t/exe | grep -Fq '{"foo":"bar"}' diff --git a/test/elf/relocatable-no-ehframe.sh b/test/elf/relocatable-no-ehframe.sh deleted file mode 100755 index d7c2e1a6..00000000 --- a/test/elf/relocatable-no-ehframe.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -. $(dirname $0)/common.inc - -[ $MACHINE = alpha ] && skip - -# OneTBB isn't tsan-clean -nm mold | grep -q '__tsan_init' && skip - -cat < $t/log1 -! grep -Fq .eh_frame $t/log1 || false - -./mold --relocatable -o $t/b.o $t/a.o -readelf -WS $t/b.o > $t/log2 -! grep -Fq .eh_frame $t/log2 || false diff --git a/test/elf/shared-abs-sym.sh b/test/elf/shared-abs-sym.sh deleted file mode 100755 index f462130c..00000000 --- a/test/elf/shared-abs-sym.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -. $(dirname $0)/common.inc - -cat < -extern char foo; -int main() { printf("foo=%p\n", &foo); } -EOF - -# This test fails with older glibc -$CC -B. -o $t/exe1 -pie $t/c.o $t/a.so 2> /dev/null || skip -$QEMU $t/exe1 | grep -q 'foo=0x3' || skip -LD_PRELOAD=$t/b.so $QEMU $t/exe1 | grep -q 'foo=0x5' - -$CC -B. -o $t/exe2 -pie $t/c.o $t/a.so -$QEMU $t/exe2 | grep -q 'foo=0x3' -LD_PRELOAD=$t/b.so $QEMU $t/exe2 | grep -q 'foo=0x5' - -$CC -B. -o $t/exe3 -no-pie $t/c.o $t/a.so -$QEMU $t/exe3 | grep -q 'foo=0x3' -LD_PRELOAD=$t/b.so $QEMU $t/exe3 | grep -q 'foo=0x5' diff --git a/test/elf/z-pack-relative-relocs.sh b/test/elf/z-pack-relative-relocs.sh deleted file mode 100755 index e09d441e..00000000 --- a/test/elf/z-pack-relative-relocs.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -. $(dirname $0)/common.inc - -cat < -int main() { - printf("Hello world\n"); -} -EOF - -$CC -B. -o $t/exe $t/a.o -pie -Wl,-z,pack-relative-relocs - -readelf -W -V $t/exe > $t/log -grep -Fq GLIBC_2. $t/log || skip - -grep -q GLIBC_ABI_DT_RELR $t/log diff --git a/test/elf/emit-relocs-cpp.sh b/test/emit-relocs-cpp.sh similarity index 100% rename from test/elf/emit-relocs-cpp.sh rename to test/emit-relocs-cpp.sh diff --git a/test/elf/emit-relocs-dead-sections.sh b/test/emit-relocs-dead-sections.sh similarity index 100% rename from test/elf/emit-relocs-dead-sections.sh rename to test/emit-relocs-dead-sections.sh diff --git a/test/elf/emit-relocs.sh b/test/emit-relocs.sh similarity index 100% rename from test/elf/emit-relocs.sh rename to test/emit-relocs.sh diff --git a/test/empty-arg.sh b/test/empty-arg.sh new file mode 100755 index 00000000..60182b07 --- /dev/null +++ b/test/empty-arg.sh @@ -0,0 +1,5 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +! ./mold -m elf_x86_64 '' >& $t/log +grep -q 'cannot open :' $t/log diff --git a/test/elf/empty-file.sh b/test/empty-file.sh similarity index 100% rename from test/elf/empty-file.sh rename to test/empty-file.sh diff --git a/test/elf/empty-input.sh b/test/empty-input.sh similarity index 100% rename from test/elf/empty-input.sh rename to test/empty-input.sh diff --git a/test/elf/empty-version.sh b/test/empty-version.sh similarity index 100% rename from test/elf/empty-version.sh rename to test/empty-version.sh diff --git a/test/elf/entry.sh b/test/entry.sh similarity index 100% rename from test/elf/entry.sh rename to test/entry.sh diff --git a/test/elf/exception-multiple-ehframe.sh b/test/exception-multiple-ehframe.sh similarity index 96% rename from test/elf/exception-multiple-ehframe.sh rename to test/exception-multiple-ehframe.sh index 1b9f434a..c411eb92 100755 --- a/test/elf/exception-multiple-ehframe.sh +++ b/test/exception-multiple-ehframe.sh @@ -5,7 +5,6 @@ nm mold | grep -q '__tsan_init' && skip command -v perl > /dev/null || skip -[ $MACHINE = m68k ] && skip [ $MACHINE = sh4 ] && skip cat < $t/log ! grep -Fq bar $t/log || false grep -Fq baz $t/log +$CC -B. -shared -o $t/f.so $t/e.o $t/c.a $t/d.a -Wl,-exclude-libs=c.a:d.a +readelf --dyn-syms $t/f.so > $t/log +! grep -Fq foo $t/log || false +! grep -Fq bar $t/log || false +grep -Fq baz $t/log + $CC -B. -shared -o $t/f.so $t/e.o $t/c.a $t/d.a -Wl,-exclude-libs=ALL readelf --dyn-syms $t/f.so > $t/log ! grep -Fq foo $t/log || false diff --git a/test/elf/exclude-libs2.sh b/test/exclude-libs2.sh similarity index 100% rename from test/elf/exclude-libs2.sh rename to test/exclude-libs2.sh diff --git a/test/elf/exclude-libs3.sh b/test/exclude-libs3.sh similarity index 100% rename from test/elf/exclude-libs3.sh rename to test/exclude-libs3.sh diff --git a/test/elf/execstack.sh b/test/execstack.sh similarity index 100% rename from test/elf/execstack.sh rename to test/execstack.sh diff --git a/test/elf/execute-only.sh b/test/execute-only.sh similarity index 100% rename from test/elf/execute-only.sh rename to test/execute-only.sh diff --git a/test/elf/export-dynamic.sh b/test/export-dynamic.sh similarity index 100% rename from test/elf/export-dynamic.sh rename to test/export-dynamic.sh diff --git a/test/elf/export-from-exe.sh b/test/export-from-exe.sh similarity index 100% rename from test/elf/export-from-exe.sh rename to test/export-from-exe.sh diff --git a/test/elf/fatal-warnings.sh b/test/fatal-warnings.sh similarity index 100% rename from test/elf/fatal-warnings.sh rename to test/fatal-warnings.sh diff --git a/test/elf/filler.sh b/test/filler.sh similarity index 100% rename from test/elf/filler.sh rename to test/filler.sh diff --git a/test/elf/filter.sh b/test/filter.sh similarity index 100% rename from test/elf/filter.sh rename to test/filter.sh diff --git a/test/elf/func-addr.sh b/test/func-addr.sh similarity index 100% rename from test/elf/func-addr.sh rename to test/func-addr.sh diff --git a/test/elf/gc-sections.sh b/test/gc-sections.sh similarity index 100% rename from test/elf/gc-sections.sh rename to test/gc-sections.sh diff --git a/test/elf/gdb-index-compress-output.sh b/test/gdb-index-compress-output.sh similarity index 100% rename from test/elf/gdb-index-compress-output.sh rename to test/gdb-index-compress-output.sh diff --git a/test/elf/gdb-index-dwarf2.sh b/test/gdb-index-dwarf2.sh similarity index 100% rename from test/elf/gdb-index-dwarf2.sh rename to test/gdb-index-dwarf2.sh diff --git a/test/elf/gdb-index-dwarf3.sh b/test/gdb-index-dwarf3.sh similarity index 100% rename from test/elf/gdb-index-dwarf3.sh rename to test/gdb-index-dwarf3.sh diff --git a/test/elf/gdb-index-dwarf4.sh b/test/gdb-index-dwarf4.sh similarity index 100% rename from test/elf/gdb-index-dwarf4.sh rename to test/gdb-index-dwarf4.sh diff --git a/test/elf/gdb-index-dwarf5.sh b/test/gdb-index-dwarf5.sh similarity index 86% rename from test/elf/gdb-index-dwarf5.sh rename to test/gdb-index-dwarf5.sh index b7ec1af5..1f3ebc84 100755 --- a/test/elf/gdb-index-dwarf5.sh +++ b/test/gdb-index-dwarf5.sh @@ -65,6 +65,8 @@ $CC -c -o $t/d.o $t/d.c -fPIC -g -ggnu-pubnames -gdwarf-5 -ffunction-sections $CC -B. -shared -o $t/e.so $t/a.o $t/b.o $t/c.o $t/d.o -Wl,--gdb-index readelf -WS $t/e.so 2> /dev/null | grep -Fq .gdb_index +readelf --debug=gdb_index $t/e.so 2> /dev/null | grep -q 'fn1: .* \[global, function\]' +readelf --debug=gdb_index $t/e.so 2> /dev/null | grep -q 'char: .* \[static, type\]' cat < /dev/null | grep -Fq .gdb_index +readelf --debug=gdb_index $t/exe 2> /dev/null | grep -q 'main: .* \[global, function\]' $QEMU $t/exe | grep -q 'Hello world' diff --git a/test/elf/gdb-index-dwarf64.sh b/test/gdb-index-dwarf64.sh similarity index 100% rename from test/elf/gdb-index-dwarf64.sh rename to test/gdb-index-dwarf64.sh diff --git a/test/elf/gdb-index-empty.sh b/test/gdb-index-empty.sh similarity index 100% rename from test/elf/gdb-index-empty.sh rename to test/gdb-index-empty.sh diff --git a/test/elf/gdb-index-split-dwarf.sh b/test/gdb-index-split-dwarf.sh similarity index 100% rename from test/elf/gdb-index-split-dwarf.sh rename to test/gdb-index-split-dwarf.sh diff --git a/test/elf/glibc-2.22-bug.sh b/test/glibc-2.22-bug.sh similarity index 94% rename from test/elf/glibc-2.22-bug.sh rename to test/glibc-2.22-bug.sh index 1539d209..27820acc 100755 --- a/test/elf/glibc-2.22-bug.sh +++ b/test/glibc-2.22-bug.sh @@ -1,7 +1,6 @@ #!/bin/bash . $(dirname $0)/common.inc -[ $MACHINE = alpha ] && skip # glibc 2.22 or prior have a bug that ld-linux.so.2 crashes on dlopen() # if .rela.dyn and .rela.plt are not contiguous in a given DSO. diff --git a/test/elf/global-offset-table.sh b/test/global-offset-table.sh similarity index 100% rename from test/elf/global-offset-table.sh rename to test/global-offset-table.sh diff --git a/test/elf/gnu-hash.sh b/test/gnu-hash.sh similarity index 100% rename from test/elf/gnu-hash.sh rename to test/gnu-hash.sh diff --git a/test/gnu-property.sh b/test/gnu-property.sh new file mode 100755 index 00000000..aff85c01 --- /dev/null +++ b/test/gnu-property.sh @@ -0,0 +1,10 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < #include diff --git a/test/elf/large-alignment-dso.sh b/test/large-alignment-dso.sh similarity index 100% rename from test/elf/large-alignment-dso.sh rename to test/large-alignment-dso.sh diff --git a/test/elf/large-alignment.sh b/test/large-alignment.sh similarity index 100% rename from test/elf/large-alignment.sh rename to test/large-alignment.sh diff --git a/test/elf/large-max-page-size-strip.sh b/test/large-max-page-size-strip.sh similarity index 100% rename from test/elf/large-max-page-size-strip.sh rename to test/large-max-page-size-strip.sh diff --git a/test/elf/large-max-page-size.sh b/test/large-max-page-size.sh similarity index 100% rename from test/elf/large-max-page-size.sh rename to test/large-max-page-size.sh diff --git a/test/elf/large-text.sh b/test/large-text.sh similarity index 100% rename from test/elf/large-text.sh rename to test/large-text.sh diff --git a/test/library.sh b/test/library.sh new file mode 100755 index 00000000..91d40bff --- /dev/null +++ b/test/library.sh @@ -0,0 +1,22 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +void hello() { + printf("Hello world\n"); +} +EOF + +$CC -B. -shared -o $t/libfoobar.so $t/a.o + +cat <& /dev/null \ - || skip +test_cflags -flto || skip cat < diff --git a/test/elf/lto-archive2.sh b/test/lto-archive2.sh similarity index 73% rename from test/elf/lto-archive2.sh rename to test/lto-archive2.sh index 43572941..8a63b9d7 100755 --- a/test/elf/lto-archive2.sh +++ b/test/lto-archive2.sh @@ -1,7 +1,7 @@ #!/bin/bash . $(dirname $0)/common.inc -echo 'int main() {}' | $CC -flto=auto -o /dev/null -xc - >& /dev/null || skip +test_cflags -flto=auto || skip echo | $CC -o $t/a.o -c -flto=auto -xc - diff --git a/test/elf/lto-dso.sh b/test/lto-dso.sh similarity index 80% rename from test/elf/lto-dso.sh rename to test/lto-dso.sh index 5fe3c4d5..61c27794 100755 --- a/test/elf/lto-dso.sh +++ b/test/lto-dso.sh @@ -1,8 +1,7 @@ #!/bin/bash . $(dirname $0)/common.inc -echo 'int main() {}' | $CC -flto -o /dev/null -xc - >& /dev/null \ - || skip +test_cflags -flto || skip cat <& /dev/null \ +echo 'int main() {}' | $GCC -B. -flto -o /dev/null -xc - >& /dev/null \ || skip cat <& /dev/null \ +echo 'int main() {}' | clang -B. -flto -o /dev/null -xc - >& /dev/null \ || skip cat < $t/a.s -seq 1 100000 | sed 's/.*/.section .data.\0,"aw"\n.globl x\0\nx\0: .word 0\n/g' >> $t/a.s +seq 1 100000 | sed 's/.*/.section .data.&,"aw"\n.globl x&\nx&: .word 0\n/g' >> $t/a.s $CC -c -xassembler -o $t/a.o $t/a.s ./mold --relocatable -o $t/b.o $t/a.o diff --git a/test/elf/mergeable-strings.sh b/test/mergeable-strings.sh similarity index 100% rename from test/elf/mergeable-strings.sh rename to test/mergeable-strings.sh diff --git a/test/elf/missing-but-ok.sh b/test/missing-but-ok.sh similarity index 100% rename from test/elf/missing-but-ok.sh rename to test/missing-but-ok.sh diff --git a/test/elf/missing-error.sh b/test/missing-error.sh similarity index 100% rename from test/elf/missing-error.sh rename to test/missing-error.sh diff --git a/test/elf/mold-wrapper.sh b/test/mold-wrapper.sh similarity index 98% rename from test/elf/mold-wrapper.sh rename to test/mold-wrapper.sh index 2bd0bb99..4748c8d7 100755 --- a/test/elf/mold-wrapper.sh +++ b/test/mold-wrapper.sh @@ -8,7 +8,7 @@ ldd mold-wrapper.so | grep -q libasan && skip nm mold | grep -q '__[at]san_init' && skip cat <<'EOF' > $t/a.sh -#!/bin/bash +#!/usr/bin/env bash echo "$0" "$@" $FOO EOF diff --git a/test/elf/mold-wrapper2.sh b/test/mold-wrapper2.sh similarity index 100% rename from test/elf/mold-wrapper2.sh rename to test/mold-wrapper2.sh diff --git a/test/elf/nmagic.sh b/test/nmagic.sh similarity index 100% rename from test/elf/nmagic.sh rename to test/nmagic.sh diff --git a/test/no-allow-shlib-undefined.sh b/test/no-allow-shlib-undefined.sh new file mode 100755 index 00000000..846e046a --- /dev/null +++ b/test/no-allow-shlib-undefined.sh @@ -0,0 +1,21 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& $t/log || false +grep -Fq 'undefined symbol: foo' $t/log diff --git a/test/elf/no-eh-frame-header.sh b/test/no-eh-frame-header.sh similarity index 100% rename from test/elf/no-eh-frame-header.sh rename to test/no-eh-frame-header.sh diff --git a/test/elf/bug178.sh b/test/no-object-file.sh similarity index 100% rename from test/elf/bug178.sh rename to test/no-object-file.sh diff --git a/test/elf/no-quick-exit.sh b/test/no-quick-exit.sh similarity index 100% rename from test/elf/no-quick-exit.sh rename to test/no-quick-exit.sh diff --git a/test/elf/no-undefined-version.sh b/test/no-undefined-version.sh similarity index 100% rename from test/elf/no-undefined-version.sh rename to test/no-undefined-version.sh diff --git a/test/elf/nocopyreloc.sh b/test/nocopyreloc.sh similarity index 95% rename from test/elf/nocopyreloc.sh rename to test/nocopyreloc.sh index 06165fbc..bcfa044f 100755 --- a/test/elf/nocopyreloc.sh +++ b/test/nocopyreloc.sh @@ -7,7 +7,6 @@ [ $MACHINE = ppc64 ] && skip [ $MACHINE = ppc64le ] && skip [ $MACHINE = sh4 ] && skip -[ $MACHINE = alpha ] && skip [[ $MACHINE = loongarch* ]] && skip cat < +int main() { + printf("Hello world\n"); +} +EOF + +$CC -B. -o $t/exe1 $t/a.o -Wl,-package-metadata='{"foo":"bar"}' +readelf -x .note.package $t/exe1 | grep -Fq '{"foo":"bar"}' + +$CC -B. -o $t/exe2 $t/a.o -Wl,--encoded-package-metadata=%7B%22foo%22%3A%22bar%22%7D +readelf -x .note.package $t/exe2 | grep -Fq '{"foo":"bar"}' + +! $CC -B. -o $t/exe3 $t/a.o -Wl,--encoded-package-metadata=foo%x >& $t/log +grep -q 'invalid string: foo%x' $t/log diff --git a/test/elf/physical-image-base.sh b/test/physical-image-base.sh similarity index 100% rename from test/elf/physical-image-base.sh rename to test/physical-image-base.sh diff --git a/test/elf/pie.sh b/test/pie.sh similarity index 100% rename from test/elf/pie.sh rename to test/pie.sh diff --git a/test/elf/plt-dso.sh b/test/plt-dso.sh similarity index 100% rename from test/elf/plt-dso.sh rename to test/plt-dso.sh diff --git a/test/elf/pltgot.sh b/test/pltgot.sh similarity index 100% rename from test/elf/pltgot.sh rename to test/pltgot.sh diff --git a/test/elf/preinit-array.sh b/test/preinit-array.sh similarity index 100% rename from test/elf/preinit-array.sh rename to test/preinit-array.sh diff --git a/test/elf/print-dependencies.sh b/test/print-dependencies.sh similarity index 100% rename from test/elf/print-dependencies.sh rename to test/print-dependencies.sh diff --git a/test/elf/protected-dynsym.sh b/test/protected-dynsym.sh similarity index 100% rename from test/elf/protected-dynsym.sh rename to test/protected-dynsym.sh diff --git a/test/elf/protected.sh b/test/protected.sh similarity index 100% rename from test/elf/protected.sh rename to test/protected.sh diff --git a/test/elf/push-pop-state.sh b/test/push-pop-state.sh similarity index 100% rename from test/elf/push-pop-state.sh rename to test/push-pop-state.sh diff --git a/test/elf/range-extension-thunk.sh b/test/range-extension-thunk.sh similarity index 85% rename from test/elf/range-extension-thunk.sh rename to test/range-extension-thunk.sh index cde896e0..065287dd 100755 --- a/test/elf/range-extension-thunk.sh +++ b/test/range-extension-thunk.sh @@ -4,10 +4,16 @@ # Skip if 32 bits as we use very large addresses in this test. [ $MACHINE = i686 ] && skip [ $MACHINE = riscv32 ] && skip +[ $MACHINE = m68k ] && skip # It looks like SPARC's runtime can't handle PLT if it's too far from GOT. [ $MACHINE = sparc64 ] && skip +# Current LoongArch compilers emit BL for function calls, but I believe +# they'll emit PCADDU18I + JIRL (which can address PC ± 128 GiB) in the +# future. +[[ $MACHINE = loongarch* ]] && skip + # qemu aborts with the "Unknown exception 0x5" error, although this # test passes on a real POWER10 machine. on_qemu && [ "$CPU" = power10 ] && skip diff --git a/test/elf/range-extension-thunk2.sh b/test/range-extension-thunk2.sh similarity index 100% rename from test/elf/range-extension-thunk2.sh rename to test/range-extension-thunk2.sh diff --git a/test/elf/range-extension-thunk3.sh b/test/range-extension-thunk3.sh similarity index 55% rename from test/elf/range-extension-thunk3.sh rename to test/range-extension-thunk3.sh index bb45e594..cdc8e2b9 100755 --- a/test/elf/range-extension-thunk3.sh +++ b/test/range-extension-thunk3.sh @@ -1,15 +1,14 @@ #!/bin/bash . $(dirname $0)/common.inc -[ $MACHINE = alpha ] && skip [ $MACHINE = sh4 ] && skip -seq 1 10000 | sed 's/.*/void func\0() {}/' > $t/a.c +seq 1 10000 | sed 's/.*/void func&() {}/' > $t/a.c $CC -B. -o $t/b.so -shared $t/a.c -seq 1 10000 | sed 's/.*/void func\0();/' > $t/c.c +seq 1 10000 | sed 's/.*/void func&();/' > $t/c.c echo 'int main() {' >> $t/c.c -seq 1 10000 | sed 's/.*/func\0();/' >> $t/c.c +seq 1 10000 | sed 's/.*/func&();/' >> $t/c.c echo '}' >> $t/c.c $CC -c -o $t/d.o $t/c.c diff --git a/test/elf/relax-got-load.sh b/test/relax-got-load.sh similarity index 100% rename from test/elf/relax-got-load.sh rename to test/relax-got-load.sh diff --git a/test/elf/reloc-rodata.sh b/test/reloc-rodata.sh similarity index 100% rename from test/elf/reloc-rodata.sh rename to test/reloc-rodata.sh diff --git a/test/elf/relocatable-archive.sh b/test/relocatable-archive.sh similarity index 100% rename from test/elf/relocatable-archive.sh rename to test/relocatable-archive.sh diff --git a/test/elf/relocatable-c++.sh b/test/relocatable-c++.sh similarity index 88% rename from test/elf/relocatable-c++.sh rename to test/relocatable-c++.sh index e20cdfe5..6ce5e7be 100755 --- a/test/elf/relocatable-c++.sh +++ b/test/relocatable-c++.sh @@ -4,10 +4,6 @@ # OneTBB isn't tsan-clean nm mold | grep -q '__tsan_init' && skip -# Ubuntu 22.04 GCC is broken -[ $MACHINE = m68k ] && skip -[ $MACHINE = sh4 ] && skip - cat < $t/log -! grep -qw foo $t/log || false -! grep -qw bar $t/log || false -! grep -qw main $t/log || false +! grep -q ' foo$' $t/log || false +! grep -q ' bar$' $t/log || false +! grep -q ' main$' $t/log || false -grep -qw baz $t/log +grep -q ' baz$' $t/log diff --git a/test/elf/reverse-sections.sh b/test/reverse-sections.sh similarity index 100% rename from test/elf/reverse-sections.sh rename to test/reverse-sections.sh diff --git a/test/elf/rodata-name.sh b/test/rodata-name.sh similarity index 100% rename from test/elf/rodata-name.sh rename to test/rodata-name.sh diff --git a/test/elf/rosegment.sh b/test/rosegment.sh similarity index 100% rename from test/elf/rosegment.sh rename to test/rosegment.sh diff --git a/test/elf/rpath.sh b/test/rpath.sh similarity index 100% rename from test/elf/rpath.sh rename to test/rpath.sh diff --git a/test/elf/run-clang.sh b/test/run-clang.sh similarity index 100% rename from test/elf/run-clang.sh rename to test/run-clang.sh diff --git a/test/elf/run.sh b/test/run.sh similarity index 97% rename from test/elf/run.sh rename to test/run.sh index b103fd07..e6257636 100755 --- a/test/elf/run.sh +++ b/test/run.sh @@ -16,7 +16,7 @@ int main() { EOF LD_PRELOAD=`pwd`/mold-wrapper.so MOLD_PATH=`pwd`/mold \ - $GCC -o $t/exe $t/a.o -B/usr/bin + $CC -o $t/exe $t/a.o -B/usr/bin readelf -p .comment $t/exe > $t/log grep -q mold $t/log diff --git a/test/elf/section-align.sh b/test/section-align.sh similarity index 100% rename from test/elf/section-align.sh rename to test/section-align.sh diff --git a/test/elf/section-attributes.sh b/test/section-attributes.sh similarity index 100% rename from test/elf/section-attributes.sh rename to test/section-attributes.sh diff --git a/test/elf/section-order.sh b/test/section-order.sh similarity index 97% rename from test/elf/section-order.sh rename to test/section-order.sh index fb856ed8..989089cd 100755 --- a/test/elf/section-order.sh +++ b/test/section-order.sh @@ -3,6 +3,7 @@ # qemu crashes if the ELF header is not mapped to memory on_qemu && skip +[ "$(uname)" = FreeBSD ] && skip cat < diff --git a/test/elf/section-start.sh b/test/section-start.sh similarity index 100% rename from test/elf/section-start.sh rename to test/section-start.sh diff --git a/test/separate-debug-file.sh b/test/separate-debug-file.sh new file mode 100755 index 00000000..7430c94e --- /dev/null +++ b/test/separate-debug-file.sh @@ -0,0 +1,28 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +nm mold | grep -q '__tsan_init' && skip +on_qemu && skip +command -v gdb >& /dev/null || skip +command -v flock >& /dev/null || skip + +cat < $t/a.c +#include +int main() { + printf("Hello world\n"); +} +EOF + +$CC -c -o $t/a.o $t/a.c -g +$CC -B. -o $t/exe1 $t/a.o -Wl,--separate-debug-file +readelf -SW $t/exe1 | grep -Fq .gnu_debuglink + +flock $t/exe1 true +gdb $t/exe1 -ex 'list main' -ex 'quit' | grep -Fq printf + +$CC -c -o $t/a.o $t/a.c -g +$CC -B. -o $t/exe2 $t/a.o -Wl,--separate-debug-file -Wl,--no-build-id +readelf -SW $t/exe2 | grep -Fq .gnu_debuglink + +flock $t/exe2 true +gdb $t/exe2 -ex 'list main' -ex 'quit' | grep -Fq printf diff --git a/test/shared-abs-sym.sh b/test/shared-abs-sym.sh new file mode 100755 index 00000000..cc6e0b0b --- /dev/null +++ b/test/shared-abs-sym.sh @@ -0,0 +1,30 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +extern char foo; +int main() { printf("foo=%p\n", &foo); } +EOF + +cp $t/a.so $t/c.so +$CC -B. -o $t/exe1 $t/d.o $t/c.so -pie || skip +$QEMU $t/exe1 | grep -q 'foo=0x3' || skip +cp $t/b.so $t/c.so +$QEMU $t/exe1 | grep -q 'foo=0x5' + +cp $t/a.so $t/c.so +$CC -B. -o $t/exe2 $t/d.o $t/c.so -no-pie +$QEMU $t/exe2 | grep -q 'foo=0x3' +cp $t/b.so $t/c.so +$QEMU $t/exe1 | grep -q 'foo=0x5' diff --git a/test/elf/shared.sh b/test/shared.sh similarity index 100% rename from test/elf/shared.sh rename to test/shared.sh diff --git a/test/elf/shuffle-sections-seed.sh b/test/shuffle-sections-seed.sh similarity index 100% rename from test/elf/shuffle-sections-seed.sh rename to test/shuffle-sections-seed.sh diff --git a/test/elf/shuffle-sections.sh b/test/shuffle-sections.sh similarity index 100% rename from test/elf/shuffle-sections.sh rename to test/shuffle-sections.sh diff --git a/test/elf/soname.sh b/test/soname.sh similarity index 100% rename from test/elf/soname.sh rename to test/soname.sh diff --git a/test/elf/spare-program-headers.sh b/test/spare-program-headers.sh similarity index 100% rename from test/elf/spare-program-headers.sh rename to test/spare-program-headers.sh diff --git a/test/elf/start-lib.sh b/test/start-lib.sh similarity index 100% rename from test/elf/start-lib.sh rename to test/start-lib.sh diff --git a/test/elf/start-stop-symbol.sh b/test/start-stop-symbol.sh similarity index 100% rename from test/elf/start-stop-symbol.sh rename to test/start-stop-symbol.sh diff --git a/test/elf/start-stop.sh b/test/start-stop.sh similarity index 100% rename from test/elf/start-stop.sh rename to test/start-stop.sh diff --git a/test/elf/static-archive.sh b/test/static-archive.sh similarity index 100% rename from test/elf/static-archive.sh rename to test/static-archive.sh diff --git a/test/elf/static-pie.sh b/test/static-pie.sh similarity index 100% rename from test/elf/static-pie.sh rename to test/static-pie.sh diff --git a/test/elf/stdout.sh b/test/stdout.sh similarity index 100% rename from test/elf/stdout.sh rename to test/stdout.sh diff --git a/test/elf/strip-debug.sh b/test/strip-debug.sh similarity index 100% rename from test/elf/strip-debug.sh rename to test/strip-debug.sh diff --git a/test/elf/strip.sh b/test/strip.sh similarity index 79% rename from test/elf/strip.sh rename to test/strip.sh index f39cdc39..de6b7d1e 100755 --- a/test/elf/strip.sh +++ b/test/strip.sh @@ -15,7 +15,7 @@ grep -Fq _start $t/log grep -Fq foo $t/log grep -Fq bar $t/log -if [ $MACHINE '!=' riscv32 ] && [ $MACHINE '!=' riscv64 ]; then +if [[ $MACHINE != riscv* ]] && [[ $MACHINE != loongarch* ]]; then grep -Fq .L.baz $t/log fi @@ -25,6 +25,6 @@ readelf --symbols $t/exe > $t/log ! grep -Fq foo $t/log || false ! grep -Fq bar $t/log || false -if [ $MACHINE '!=' riscv32 ] && [ $MACHINE '!=' riscv64 ]; then +if [[ $MACHINE != riscv* ]] && [[ $MACHINE != loongarch* ]]; then ! grep -Fq .L.baz $t/log || false fi diff --git a/test/elf/stt-common.sh b/test/stt-common.sh similarity index 100% rename from test/elf/stt-common.sh rename to test/stt-common.sh diff --git a/test/elf/symbol-rank.sh b/test/symbol-rank.sh similarity index 100% rename from test/elf/symbol-rank.sh rename to test/symbol-rank.sh diff --git a/test/elf/symbol-version-lto.sh b/test/symbol-version-lto.sh similarity index 92% rename from test/elf/symbol-version-lto.sh rename to test/symbol-version-lto.sh index f8b3f2eb..de02e456 100755 --- a/test/elf/symbol-version-lto.sh +++ b/test/symbol-version-lto.sh @@ -1,6 +1,8 @@ #!/bin/bash . $(dirname $0)/common.inc +test_cflags -flto || skip + cat < $t/log grep -Eq 'thin-archive/d.a\(.*long-long-long-filename.o\)' $t/log -grep -Eq 'thin-archive/d.a\(.*/b.o\)' $t/log +grep -Eq 'thin-archive/d.a\((.*/)?b.o\)' $t/log grep -Fq thin-archive/d.o $t/log $QEMU $t/exe | grep -q 15 diff --git a/test/elf/thread-count.sh b/test/thread-count.sh similarity index 100% rename from test/elf/thread-count.sh rename to test/thread-count.sh diff --git a/test/elf/tls-alignment-multi.sh b/test/tls-alignment-multi.sh similarity index 100% rename from test/elf/tls-alignment-multi.sh rename to test/tls-alignment-multi.sh diff --git a/test/elf/tls-common.sh b/test/tls-common.sh similarity index 100% rename from test/elf/tls-common.sh rename to test/tls-common.sh diff --git a/test/elf/tls-df-static-tls.sh b/test/tls-df-static-tls.sh similarity index 100% rename from test/elf/tls-df-static-tls.sh rename to test/tls-df-static-tls.sh diff --git a/test/elf/tls-dso.sh b/test/tls-dso.sh similarity index 100% rename from test/elf/tls-dso.sh rename to test/tls-dso.sh diff --git a/test/elf/tls-gd-dlopen.sh b/test/tls-gd-dlopen.sh similarity index 100% rename from test/elf/tls-gd-dlopen.sh rename to test/tls-gd-dlopen.sh diff --git a/test/elf/tls-gd-noplt.sh b/test/tls-gd-noplt.sh similarity index 100% rename from test/elf/tls-gd-noplt.sh rename to test/tls-gd-noplt.sh diff --git a/test/elf/tls-gd-to-ie.sh b/test/tls-gd-to-ie.sh similarity index 100% rename from test/elf/tls-gd-to-ie.sh rename to test/tls-gd-to-ie.sh diff --git a/test/elf/tls-gd.sh b/test/tls-gd.sh similarity index 100% rename from test/elf/tls-gd.sh rename to test/tls-gd.sh diff --git a/test/elf/tls-ie.sh b/test/tls-ie.sh similarity index 100% rename from test/elf/tls-ie.sh rename to test/tls-ie.sh diff --git a/test/elf/tls-irregular-start-addr.sh b/test/tls-irregular-start-addr.sh similarity index 100% rename from test/elf/tls-irregular-start-addr.sh rename to test/tls-irregular-start-addr.sh diff --git a/test/elf/tls-large-alignment.sh b/test/tls-large-alignment.sh similarity index 100% rename from test/elf/tls-large-alignment.sh rename to test/tls-large-alignment.sh diff --git a/test/elf/tls-large-static-image.sh b/test/tls-large-static-image.sh similarity index 100% rename from test/elf/tls-large-static-image.sh rename to test/tls-large-static-image.sh diff --git a/test/elf/tls-ld-noplt.sh b/test/tls-ld-noplt.sh similarity index 100% rename from test/elf/tls-ld-noplt.sh rename to test/tls-ld-noplt.sh diff --git a/test/elf/tls-ld.sh b/test/tls-ld.sh similarity index 100% rename from test/elf/tls-ld.sh rename to test/tls-ld.sh diff --git a/test/elf/tls-le-error.sh b/test/tls-le-error.sh similarity index 100% rename from test/elf/tls-le-error.sh rename to test/tls-le-error.sh diff --git a/test/elf/tls-le.sh b/test/tls-le.sh similarity index 78% rename from test/elf/tls-le.sh rename to test/tls-le.sh index 33e13411..502c73b0 100755 --- a/test/elf/tls-le.sh +++ b/test/tls-le.sh @@ -22,8 +22,8 @@ cat < $t/log1 -! grep -Eq 'TLS.?DESC' $t/log1 || false +$OBJDUMP --dynamic-reloc $t/exe1 > $t/log1 +! grep -Eq 'TLS_?DESC' $t/log1 || false -$CC -B. -o $t/exe1 $t/c.o $t/d.o $t/b.so -Wl,--no-relax -$QEMU $t/exe1 | grep -q '^5 5 5$' +$CC -B. -o $t/exe2 $t/c.o $t/d.o $t/b.so -Wl,--no-relax +$QEMU $t/exe2 | grep -q '^5 5 5$' -readelf -Wr $t/exe1 > $t/log2 -grep -Eq 'TLS.?DESC' $t/log2 +$OBJDUMP --dynamic-reloc $t/exe2 > $t/log2 +grep -Eq 'TLS_?DESC' $t/log2 diff --git a/test/elf/tlsdesc-local-dynamic.sh b/test/tlsdesc-local-dynamic.sh similarity index 100% rename from test/elf/tlsdesc-local-dynamic.sh rename to test/tlsdesc-local-dynamic.sh diff --git a/test/elf/tlsdesc-static.sh b/test/tlsdesc-static.sh similarity index 100% rename from test/elf/tlsdesc-static.sh rename to test/tlsdesc-static.sh diff --git a/test/elf/tlsdesc.sh b/test/tlsdesc.sh similarity index 100% rename from test/elf/tlsdesc.sh rename to test/tlsdesc.sh diff --git a/test/elf/trace-symbol-symver.sh b/test/trace-symbol-symver.sh similarity index 100% rename from test/elf/trace-symbol-symver.sh rename to test/trace-symbol-symver.sh diff --git a/test/elf/trace-symbol.sh b/test/trace-symbol.sh similarity index 100% rename from test/elf/trace-symbol.sh rename to test/trace-symbol.sh diff --git a/test/elf/trace.sh b/test/trace.sh similarity index 100% rename from test/elf/trace.sh rename to test/trace.sh diff --git a/test/elf/undefined-glob-gc-sections.sh b/test/undefined-glob-gc-sections.sh similarity index 100% rename from test/elf/undefined-glob-gc-sections.sh rename to test/undefined-glob-gc-sections.sh diff --git a/test/elf/undefined-glob.sh b/test/undefined-glob.sh similarity index 100% rename from test/elf/undefined-glob.sh rename to test/undefined-glob.sh diff --git a/test/elf/undefined.sh b/test/undefined.sh similarity index 100% rename from test/elf/undefined.sh rename to test/undefined.sh diff --git a/test/elf/undefined2.sh b/test/undefined2.sh similarity index 100% rename from test/elf/undefined2.sh rename to test/undefined2.sh diff --git a/test/elf/unkown-section-type.sh b/test/unkown-section-type.sh similarity index 100% rename from test/elf/unkown-section-type.sh rename to test/unkown-section-type.sh diff --git a/test/elf/unresolved-symbols.sh b/test/unresolved-symbols.sh similarity index 100% rename from test/elf/unresolved-symbols.sh rename to test/unresolved-symbols.sh diff --git a/test/elf/unresolved-symbols2.sh b/test/unresolved-symbols2.sh similarity index 100% rename from test/elf/unresolved-symbols2.sh rename to test/unresolved-symbols2.sh diff --git a/test/elf/verbose.sh b/test/verbose.sh similarity index 100% rename from test/elf/verbose.sh rename to test/verbose.sh diff --git a/test/elf/version-script-search-paths.sh b/test/version-script-search-paths.sh similarity index 100% rename from test/elf/version-script-search-paths.sh rename to test/version-script-search-paths.sh diff --git a/test/elf/version-script.sh b/test/version-script.sh similarity index 100% rename from test/elf/version-script.sh rename to test/version-script.sh diff --git a/test/elf/version-script10.sh b/test/version-script10.sh similarity index 100% rename from test/elf/version-script10.sh rename to test/version-script10.sh diff --git a/test/elf/version-script11.sh b/test/version-script11.sh similarity index 100% rename from test/elf/version-script11.sh rename to test/version-script11.sh diff --git a/test/elf/version-script12.sh b/test/version-script12.sh similarity index 100% rename from test/elf/version-script12.sh rename to test/version-script12.sh diff --git a/test/elf/version-script13.sh b/test/version-script13.sh similarity index 100% rename from test/elf/version-script13.sh rename to test/version-script13.sh diff --git a/test/elf/version-script14.sh b/test/version-script14.sh similarity index 100% rename from test/elf/version-script14.sh rename to test/version-script14.sh diff --git a/test/elf/version-script15.sh b/test/version-script15.sh similarity index 100% rename from test/elf/version-script15.sh rename to test/version-script15.sh diff --git a/test/elf/version-script16.sh b/test/version-script16.sh similarity index 100% rename from test/elf/version-script16.sh rename to test/version-script16.sh diff --git a/test/elf/version-script17.sh b/test/version-script17.sh similarity index 100% rename from test/elf/version-script17.sh rename to test/version-script17.sh diff --git a/test/elf/version-script18.sh b/test/version-script18.sh similarity index 100% rename from test/elf/version-script18.sh rename to test/version-script18.sh diff --git a/test/elf/version-script19.sh b/test/version-script19.sh similarity index 100% rename from test/elf/version-script19.sh rename to test/version-script19.sh diff --git a/test/elf/version-script2.sh b/test/version-script2.sh similarity index 100% rename from test/elf/version-script2.sh rename to test/version-script2.sh diff --git a/test/elf/version-script20.sh b/test/version-script20.sh similarity index 100% rename from test/elf/version-script20.sh rename to test/version-script20.sh diff --git a/test/elf/version-script21.sh b/test/version-script21.sh similarity index 100% rename from test/elf/version-script21.sh rename to test/version-script21.sh diff --git a/test/elf/version-script22.sh b/test/version-script22.sh similarity index 100% rename from test/elf/version-script22.sh rename to test/version-script22.sh diff --git a/test/elf/version-script23.sh b/test/version-script23.sh similarity index 100% rename from test/elf/version-script23.sh rename to test/version-script23.sh diff --git a/test/elf/version-script3.sh b/test/version-script3.sh similarity index 100% rename from test/elf/version-script3.sh rename to test/version-script3.sh diff --git a/test/elf/version-script4.sh b/test/version-script4.sh similarity index 100% rename from test/elf/version-script4.sh rename to test/version-script4.sh diff --git a/test/elf/version-script5.sh b/test/version-script5.sh similarity index 100% rename from test/elf/version-script5.sh rename to test/version-script5.sh diff --git a/test/elf/version-script6.sh b/test/version-script6.sh similarity index 100% rename from test/elf/version-script6.sh rename to test/version-script6.sh diff --git a/test/elf/version-script7.sh b/test/version-script7.sh similarity index 100% rename from test/elf/version-script7.sh rename to test/version-script7.sh diff --git a/test/elf/version-script8.sh b/test/version-script8.sh similarity index 100% rename from test/elf/version-script8.sh rename to test/version-script8.sh diff --git a/test/elf/version-script9.sh b/test/version-script9.sh similarity index 100% rename from test/elf/version-script9.sh rename to test/version-script9.sh diff --git a/test/elf/version.sh b/test/version.sh similarity index 100% rename from test/elf/version.sh rename to test/version.sh diff --git a/test/elf/versioned-undef.sh b/test/versioned-undef.sh similarity index 100% rename from test/elf/versioned-undef.sh rename to test/versioned-undef.sh diff --git a/test/elf/visibility.sh b/test/visibility.sh similarity index 100% rename from test/elf/visibility.sh rename to test/visibility.sh diff --git a/test/elf/warn-common.sh b/test/warn-common.sh similarity index 100% rename from test/elf/warn-common.sh rename to test/warn-common.sh diff --git a/test/elf/warn-once.sh b/test/warn-once.sh similarity index 83% rename from test/elf/warn-once.sh rename to test/warn-once.sh index 44ab16a3..852fe0e2 100755 --- a/test/elf/warn-once.sh +++ b/test/warn-once.sh @@ -14,4 +14,4 @@ EOF $CC -B. -o $t/exe $t/a.o $t/b.o -Wl,--warn-unresolved-symbols,--warn-once >& $t/log -[ "$(grep 'undefined symbol:.* foo$' $t/log | wc -l)" = 1 ] +[ $(grep 'undefined symbol:.* foo$' $t/log | wc -l) = 1 ] diff --git a/test/elf/warn-symbol-type.sh b/test/warn-symbol-type.sh similarity index 100% rename from test/elf/warn-symbol-type.sh rename to test/warn-symbol-type.sh diff --git a/test/elf/warn-unresolved-symbols.sh b/test/warn-unresolved-symbols.sh similarity index 100% rename from test/elf/warn-unresolved-symbols.sh rename to test/warn-unresolved-symbols.sh diff --git a/test/elf/weak-export-dso.sh b/test/weak-export-dso.sh similarity index 100% rename from test/elf/weak-export-dso.sh rename to test/weak-export-dso.sh diff --git a/test/elf/weak-export-dso2.sh b/test/weak-export-dso2.sh similarity index 100% rename from test/elf/weak-export-dso2.sh rename to test/weak-export-dso2.sh diff --git a/test/elf/weak-export-exe.sh b/test/weak-export-exe.sh similarity index 100% rename from test/elf/weak-export-exe.sh rename to test/weak-export-exe.sh diff --git a/test/elf/weak-undef-dso.sh b/test/weak-undef-dso.sh similarity index 100% rename from test/elf/weak-undef-dso.sh rename to test/weak-undef-dso.sh diff --git a/test/elf/weak-undef.sh b/test/weak-undef.sh similarity index 100% rename from test/elf/weak-undef.sh rename to test/weak-undef.sh diff --git a/test/elf/weak-undef2.sh b/test/weak-undef2.sh similarity index 100% rename from test/elf/weak-undef2.sh rename to test/weak-undef2.sh diff --git a/test/elf/weak-undef4.sh b/test/weak-undef4.sh similarity index 100% rename from test/elf/weak-undef4.sh rename to test/weak-undef4.sh diff --git a/test/elf/weak-undef5.sh b/test/weak-undef5.sh similarity index 100% rename from test/elf/weak-undef5.sh rename to test/weak-undef5.sh diff --git a/test/elf/whole-archive.sh b/test/whole-archive.sh similarity index 61% rename from test/elf/whole-archive.sh rename to test/whole-archive.sh index de5da115..721acf37 100755 --- a/test/elf/whole-archive.sh +++ b/test/whole-archive.sh @@ -14,19 +14,19 @@ ar cr $t/d.a $t/b.o $t/c.o $CC -B. -nostdlib -o $t/exe $t/a.o $t/d.a -readelf --symbols $t/exe > $t/readelf -! grep -q fn1 $t/readelf || false -! grep -q fn2 $t/readelf || false +readelf --symbols $t/exe > $t/log +! grep -q fn1 $t/log || false +! grep -q fn2 $t/log || false $CC -B. -nostdlib -o $t/exe $t/a.o -Wl,--whole-archive $t/d.a -readelf --symbols $t/exe > $t/readelf -grep -q fn1 $t/readelf -grep -q fn2 $t/readelf +readelf --symbols $t/exe > $t/log +grep -q fn1 $t/log +grep -q fn2 $t/log $CC -B. -nostdlib -o $t/exe $t/a.o -Wl,--whole-archive \ -Wl,--no-whole-archive $t/d.a -readelf --symbols $t/exe > $t/readelf -! grep -q fn1 $t/readelf || false -! grep -q fn2 $t/readelf || false +readelf --symbols $t/exe > $t/log +! grep -q fn1 $t/log || false +! grep -q fn2 $t/log || false diff --git a/test/elf/wrap-lto.sh b/test/wrap-lto.sh similarity index 96% rename from test/elf/wrap-lto.sh rename to test/wrap-lto.sh index 1e26af8c..0e2fb52b 100755 --- a/test/elf/wrap-lto.sh +++ b/test/wrap-lto.sh @@ -1,6 +1,8 @@ #!/bin/bash . $(dirname $0)/common.inc +test_cflags -flto || skip + cat < diff --git a/test/elf/wrap.sh b/test/wrap.sh similarity index 100% rename from test/elf/wrap.sh rename to test/wrap.sh diff --git a/test/elf/z-cet-report.sh b/test/z-cet-report.sh similarity index 100% rename from test/elf/z-cet-report.sh rename to test/z-cet-report.sh diff --git a/test/elf/z-defs.sh b/test/z-defs.sh similarity index 100% rename from test/elf/z-defs.sh rename to test/z-defs.sh diff --git a/test/elf/z-dynamic-undefined-weak.sh b/test/z-dynamic-undefined-weak.sh similarity index 100% rename from test/elf/z-dynamic-undefined-weak.sh rename to test/z-dynamic-undefined-weak.sh diff --git a/test/elf/z-max-page-size.sh b/test/z-max-page-size.sh similarity index 100% rename from test/elf/z-max-page-size.sh rename to test/z-max-page-size.sh diff --git a/test/elf/z-nodefaultlib.sh b/test/z-nodefaultlib.sh similarity index 100% rename from test/elf/z-nodefaultlib.sh rename to test/z-nodefaultlib.sh diff --git a/test/elf/z-nodump.sh b/test/z-nodump.sh similarity index 100% rename from test/elf/z-nodump.sh rename to test/z-nodump.sh diff --git a/test/elf/z-now.sh b/test/z-now.sh similarity index 100% rename from test/elf/z-now.sh rename to test/z-now.sh diff --git a/test/elf/z-origin.sh b/test/z-origin.sh similarity index 100% rename from test/elf/z-origin.sh rename to test/z-origin.sh diff --git a/test/z-pack-relative-relocs.sh b/test/z-pack-relative-relocs.sh new file mode 100755 index 00000000..357bb859 --- /dev/null +++ b/test/z-pack-relative-relocs.sh @@ -0,0 +1,21 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +int main() { + printf("Hello world\n"); +} +EOF + +$CC -o $t/exe1 $t/a.o -pie -Wl,-z,pack-relative-relocs 2> /dev/null || skip +readelf -WS $t/exe1 | grep -Fq .relr.dyn || skip +$QEMU $t/exe1 2> /dev/null | grep -q Hello || skip + +$CC -B. -o $t/exe2 $t/a.o -pie -Wl,-z,pack-relative-relocs +$QEMU $t/exe2 | grep -q Hello + +readelf --dynamic $t/exe2 > $t/log2 +grep -wq RELR $t/log2 +grep -wq RELRSZ $t/log2 +grep -wq RELRENT $t/log2 diff --git a/test/elf/z-rodynamic.sh b/test/z-rodynamic.sh similarity index 100% rename from test/elf/z-rodynamic.sh rename to test/z-rodynamic.sh diff --git a/test/elf/z-sectionheader.sh b/test/z-sectionheader.sh similarity index 100% rename from test/elf/z-sectionheader.sh rename to test/z-sectionheader.sh diff --git a/test/elf/z-separate-code.sh b/test/z-separate-code.sh similarity index 100% rename from test/elf/z-separate-code.sh rename to test/z-separate-code.sh diff --git a/test/elf/z-stack-size.sh b/test/z-stack-size.sh similarity index 100% rename from test/elf/z-stack-size.sh rename to test/z-stack-size.sh diff --git a/test/elf/z-start-stop-visibility.sh b/test/z-start-stop-visibility.sh similarity index 100% rename from test/elf/z-start-stop-visibility.sh rename to test/z-start-stop-visibility.sh diff --git a/test/elf/z-unknown.sh b/test/z-unknown.sh similarity index 100% rename from test/elf/z-unknown.sh rename to test/z-unknown.sh diff --git a/third-party/mimalloc/.gitattributes b/third-party/mimalloc/.gitattributes index f083b107..0332e031 100644 --- a/third-party/mimalloc/.gitattributes +++ b/third-party/mimalloc/.gitattributes @@ -10,4 +10,3 @@ *.dll binary *.lib binary *.exe binary -bin export-ignore diff --git a/third-party/mimalloc/.gitignore b/third-party/mimalloc/.gitignore index f8b7f5eb..df1d58eb 100644 --- a/third-party/mimalloc/.gitignore +++ b/third-party/mimalloc/.gitignore @@ -7,3 +7,5 @@ ide/vs20??/VTune* out/ docs/ *.zip +*.tar +*.gz diff --git a/third-party/mimalloc/CMakeLists.txt b/third-party/mimalloc/CMakeLists.txt index 2cc2fc46..bcfe91d8 100644 --- a/third-party/mimalloc/CMakeLists.txt +++ b/third-party/mimalloc/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.13) +cmake_minimum_required(VERSION 3.18) project(libmimalloc C CXX) set(CMAKE_C_STANDARD 11) @@ -35,6 +35,7 @@ option(MI_NO_THP "Disable transparent huge pages support on Linux/And option(MI_CHECK_FULL "Use full internal invariant checking in DEBUG mode (deprecated, use MI_DEBUG_FULL instead)" OFF) option(MI_USE_LIBATOMIC "Explicitly link with -latomic (on older systems) (deprecated and detected automatically)" OFF) +include(CheckLinkerFlag) # requires cmake 3.18 include(CheckIncludeFiles) include(GNUInstallDirs) include("cmake/mimalloc-config-version.cmake") @@ -338,29 +339,45 @@ if (MSVC AND MSVC_VERSION GREATER_EQUAL 1914) list(APPEND mi_cflags /Zc:__cplusplus) endif() +if(MINGW) + add_definitions(-D_WIN32_WINNT=0x600) +endif() + # extra needed libraries + +# we prefer -l test over `find_library` as sometimes core libraries +# like `libatomic` are not on the system path (see issue #898) +function(find_link_library libname outlibname) + check_linker_flag(C "-l${libname}" mi_has_lib${libname}) + if (mi_has_lib${libname}) + message(VERBOSE "link library: -l${libname}") + set(${outlibname} ${libname} PARENT_SCOPE) + else() + find_library(MI_LIBPATH libname) + if (MI_LIBPATH) + message(VERBOSE "link library ${libname} at ${MI_LIBPATH}") + set(${outlibname} ${MI_LIBPATH} PARENT_SCOPE) + else() + message(VERBOSE "link library not found: ${libname}") + set(${outlibname} "" PARENT_SCOPE) + endif() + endif() +endfunction() + if(WIN32) - list(APPEND mi_libraries psapi shell32 user32 advapi32 bcrypt) - set(pc_libraries "-lpsapi -lshell32 -luser32 -ladvapi32 -lbcrypt") + list(APPEND mi_libraries psapi shell32 user32 advapi32 bcrypt) else() - set(pc_libraries "") - find_library(MI_LIBPTHREAD pthread) - if (MI_LIBPTHREAD) - list(APPEND mi_libraries ${MI_LIBPTHREAD}) - set(pc_libraries "${pc_libraries} -pthread") - endif() - find_library(MI_LIBRT rt) - if(MI_LIBRT) - list(APPEND mi_libraries ${MI_LIBRT}) - set(pc_libraries "${pc_libraries} -lrt") + find_link_library("pthread" MI_LIB_PTHREAD) + if(MI_LIB_PTHREAD) + list(APPEND mi_libraries "${MI_LIB_PTHREAD}") endif() - find_library(MI_LIBATOMIC atomic) - if (NOT MI_LIBATOMIC AND MI_USE_LIBATOMIC) - set(MI_LIBATOMIC atomic) + find_link_library("rt" MI_LIB_RT) + if(MI_LIB_RT) + list(APPEND mi_libraries "${MI_LIB_RT}") endif() - if (MI_LIBATOMIC) - list(APPEND mi_libraries ${MI_LIBATOMIC}) - set(pc_libraries "${pc_libraries} -latomic") + find_link_library("atomic" MI_LIB_ATOMIC) + if(MI_LIB_ATOMIC) + list(APPEND mi_libraries "${MI_LIB_ATOMIC}") endif() endif() @@ -369,7 +386,8 @@ endif() # ----------------------------------------------------------------------------- # dynamic/shared library and symlinks always go to /usr/local/lib equivalent -set(mi_install_libdir "${CMAKE_INSTALL_LIBDIR}") +set(mi_install_libdir "${CMAKE_INSTALL_LIBDIR}") +set(mi_install_bindir "${CMAKE_INSTALL_BINDIR}") # static libraries and object files, includes, and cmake config files # are either installed at top level, or use versioned directories for side-by-side installation (default) @@ -453,10 +471,10 @@ if(MI_BUILD_SHARED) add_custom_command(TARGET mimalloc POST_BUILD COMMAND "${CMAKE_COMMAND}" -E copy "${CMAKE_CURRENT_SOURCE_DIR}/bin/mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll" $ COMMENT "Copy mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll to output directory") - install(FILES "$/mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll" DESTINATION ${mi_install_libdir}) + install(FILES "$/mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll" DESTINATION ${mi_install_bindir}) endif() - install(TARGETS mimalloc EXPORT mimalloc DESTINATION ${mi_install_libdir} LIBRARY) + install(TARGETS mimalloc EXPORT mimalloc ARCHIVE DESTINATION ${mi_install_libdir} RUNTIME DESTINATION ${mi_install_bindir} LIBRARY DESTINATION ${mi_install_libdir}) install(EXPORT mimalloc DESTINATION ${mi_install_cmakedir}) endif() @@ -522,6 +540,15 @@ if (MI_BUILD_OBJECT) endif() # pkg-config file support +set(pc_libraries "") +foreach(item IN LISTS mi_libraries) + if(item MATCHES " *[-].*") + set(pc_libraries "${pc_libraries} ${item}") + else() + set(pc_libraries "${pc_libraries} -l${item}") + endif() +endforeach() + include("cmake/JoinPaths.cmake") join_paths(includedir_for_pc_file "\${prefix}" "${CMAKE_INSTALL_INCLUDEDIR}") join_paths(libdir_for_pc_file "\${prefix}" "${CMAKE_INSTALL_LIBDIR}") @@ -530,6 +557,8 @@ configure_file(mimalloc.pc.in mimalloc.pc @ONLY) install(FILES "${CMAKE_CURRENT_BINARY_DIR}/mimalloc.pc" DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/") + + # ----------------------------------------------------------------------------- # API surface testing # ----------------------------------------------------------------------------- diff --git a/third-party/mimalloc/bin/mimalloc-redirect.lib b/third-party/mimalloc/bin/mimalloc-redirect.lib new file mode 100644 index 00000000..de128bb9 Binary files /dev/null and b/third-party/mimalloc/bin/mimalloc-redirect.lib differ diff --git a/third-party/mimalloc/bin/mimalloc-redirect32.lib b/third-party/mimalloc/bin/mimalloc-redirect32.lib new file mode 100644 index 00000000..87f19b8e Binary files /dev/null and b/third-party/mimalloc/bin/mimalloc-redirect32.lib differ diff --git a/third-party/mimalloc/bin/readme.md b/third-party/mimalloc/bin/readme.md new file mode 100644 index 00000000..9b121bda --- /dev/null +++ b/third-party/mimalloc/bin/readme.md @@ -0,0 +1,71 @@ +# Windows Override + +Dynamically overriding on mimalloc on Windows +is robust and has the particular advantage to be able to redirect all malloc/free calls that go through +the (dynamic) C runtime allocator, including those from other DLL's or libraries. +As it intercepts all allocation calls on a low level, it can be used reliably +on large programs that include other 3rd party components. +There are four requirements to make the overriding work robustly: + +1. Use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). + +2. Link your program explicitly with `mimalloc-override.dll` library. + To ensure the `mimalloc-override.dll` is loaded at run-time it is easiest to insert some + call to the mimalloc API in the `main` function, like `mi_version()` + (or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project + for an example on how to use this. + +3. The `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be put + in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency of that DLL). + The redirection DLL ensures that all calls to the C runtime malloc API get redirected to + mimalloc functions (which reside in `mimalloc-override.dll`). + +4. Ensure the `mimalloc-override.dll` comes as early as possible in the import + list of the final executable (so it can intercept all potential allocations). + +For best performance on Windows with C++, it +is also recommended to also override the `new`/`delete` operations (by including +[`mimalloc-new-delete.h`](../include/mimalloc-new-delete.h) +a single(!) source file in your project). + +The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic +overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected. + +## Minject + +We cannot always re-link an executable with `mimalloc-override.dll`, and similarly, we cannot always +ensure the the DLL comes first in the import table of the final executable. +In many cases though we can patch existing executables without any recompilation +if they are linked with the dynamic C runtime (`ucrtbase.dll`) -- just put the `mimalloc-override.dll` +into the import table (and put `mimalloc-redirect.dll` in the same folder) +Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388). + +The `minject` program can also do this from the command line, use `minject --help` for options: + +``` +> minject --help + +minject: + Injects the mimalloc dll into the import table of a 64-bit executable, + and/or ensures that it comes first in het import table. + +usage: + > minject [options] + +options: + -h --help show this help + -v --verbose be verbose + -l --list only list imported modules + -i --inplace update the exe in-place (make sure there is a backup!) + -f --force always overwrite without prompting + --postfix=

use

as a postfix to the mimalloc dll (default is 'override') + e.g. use --postfix=override-debug to link with mimalloc-override-debug.dll + +notes: + Without '--inplace' an injected is generated with the same name ending in '-mi'. + Ensure 'mimalloc-redirect.dll' is in the same folder as the mimalloc dll. + +examples: + > minject --list myprogram.exe + > minject --force --inplace myprogram.exe +``` diff --git a/third-party/mimalloc/cmake/mimalloc-config-version.cmake b/third-party/mimalloc/cmake/mimalloc-config-version.cmake index 9b19b56b..81fd3c9d 100644 --- a/third-party/mimalloc/cmake/mimalloc-config-version.cmake +++ b/third-party/mimalloc/cmake/mimalloc-config-version.cmake @@ -1,6 +1,6 @@ set(mi_version_major 2) set(mi_version_minor 1) -set(mi_version_patch 6) +set(mi_version_patch 7) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/third-party/mimalloc/docker/alpine-arm32v7/Dockerfile b/third-party/mimalloc/docker/alpine-arm32v7/Dockerfile new file mode 100644 index 00000000..56f071db --- /dev/null +++ b/third-party/mimalloc/docker/alpine-arm32v7/Dockerfile @@ -0,0 +1,28 @@ +# install from an image +# download first an appropiate tar.gz image into the current directory +# from: +FROM scratch + +# Substitute the image name that was downloaded +ADD alpine-minirootfs-20240329-armv7.tar.gz / + +# Install tools +RUN apk add build-base make cmake +RUN apk add git +RUN apk add vim + +RUN mkdir -p /home/dev +WORKDIR /home/dev + +# Get mimalloc +RUN git clone https://github.com/microsoft/mimalloc -b dev-slice +RUN mkdir -p mimalloc/out/release +RUN mkdir -p mimalloc/out/debug + +# Build mimalloc debug +WORKDIR /home/dev/mimalloc/out/debug +RUN cmake ../.. -DMI_DEBUG_FULL=ON +RUN make -j +RUN make test + +CMD ["/bin/sh"] diff --git a/third-party/mimalloc/docker/alpine/Dockerfile b/third-party/mimalloc/docker/alpine/Dockerfile new file mode 100644 index 00000000..b222b791 --- /dev/null +++ b/third-party/mimalloc/docker/alpine/Dockerfile @@ -0,0 +1,23 @@ +# alpine image +FROM alpine + +# Install tools +RUN apk add build-base make cmake +RUN apk add git +RUN apk add vim + +RUN mkdir -p /home/dev +WORKDIR /home/dev + +# Get mimalloc +RUN git clone https://github.com/microsoft/mimalloc -b dev-slice +RUN mkdir -p mimalloc/out/release +RUN mkdir -p mimalloc/out/debug + +# Build mimalloc debug +WORKDIR /home/dev/mimalloc/out/debug +RUN cmake ../.. -DMI_DEBUG_FULL=ON +RUN make -j +RUN make test + +CMD ["/bin/sh"] \ No newline at end of file diff --git a/third-party/mimalloc/docker/manylinux-x64/Dockerfile b/third-party/mimalloc/docker/manylinux-x64/Dockerfile new file mode 100644 index 00000000..22d37e5a --- /dev/null +++ b/third-party/mimalloc/docker/manylinux-x64/Dockerfile @@ -0,0 +1,23 @@ +FROM quay.io/pypa/manylinux2014_x86_64 + +# Install tools +RUN yum install -y openssl-devel +RUN yum install -y gcc gcc-c++ kernel-devel make +RUN yum install -y git cmake +RUN yum install -y vim + +RUN mkdir -p /home/dev +WORKDIR /home/dev + +# Get mimalloc +RUN git clone https://github.com/microsoft/mimalloc -b dev-slice +RUN mkdir -p mimalloc/out/release +RUN mkdir -p mimalloc/out/debug + +# Build mimalloc debug +WORKDIR /home/dev/mimalloc/out/debug +RUN cmake ../.. -DMI_DEBUG_FULL=ON +RUN make -j +RUN make test + +CMD ["/bin/sh"] \ No newline at end of file diff --git a/third-party/mimalloc/docker/readme.md b/third-party/mimalloc/docker/readme.md new file mode 100644 index 00000000..b3d90094 --- /dev/null +++ b/third-party/mimalloc/docker/readme.md @@ -0,0 +1,10 @@ +Various example docker files used for testing. + +Usage: + +``` +> cd +> docker build -t -mimalloc . +> docker run -it -mimalloc +>> make test +``` diff --git a/third-party/mimalloc/include/mimalloc-override.h b/third-party/mimalloc/include/mimalloc-override.h index c63b0b91..48a8a622 100644 --- a/third-party/mimalloc/include/mimalloc-override.h +++ b/third-party/mimalloc/include/mimalloc-override.h @@ -24,7 +24,7 @@ not accidentally mix pointers from different allocators). #define free(p) mi_free(p) #define strdup(s) mi_strdup(s) -#define strndup(s,n) mi_strndup(s,n) +#define strndup(s,n) mi_strndup(s,n) #define realpath(f,n) mi_realpath(f,n) // Microsoft extensions @@ -43,6 +43,7 @@ not accidentally mix pointers from different allocators). #define reallocf(p,n) mi_reallocf(p,n) #define malloc_size(p) mi_usable_size(p) #define malloc_usable_size(p) mi_usable_size(p) +#define malloc_good_size(sz) mi_malloc_good_size(sz) #define cfree(p) mi_free(p) #define valloc(n) mi_valloc(n) diff --git a/third-party/mimalloc/include/mimalloc.h b/third-party/mimalloc/include/mimalloc.h index 8446d99d..c41bcc80 100644 --- a/third-party/mimalloc/include/mimalloc.h +++ b/third-party/mimalloc/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 216 // major + 2 digits minor +#define MI_MALLOC_VERSION 217 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes @@ -328,7 +328,7 @@ typedef enum mi_option_e { mi_option_allow_large_os_pages, // allow large (2 or 4 MiB) OS pages, implies eager commit. If false, also disables THP for the process. mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB pages) at startup mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node - mi_option_reserve_os_memory, // reserve specified amount of OS memory in an arena at startup + mi_option_reserve_os_memory, // reserve specified amount of OS memory in an arena at startup (internally, this value is in KiB; use `mi_option_get_size`) mi_option_deprecated_segment_cache, mi_option_deprecated_page_reset, mi_option_abandoned_page_purge, // immediately purge delayed purges on thread termination @@ -342,11 +342,12 @@ typedef enum mi_option_e { mi_option_max_warnings, // issue at most N warning messages mi_option_max_segment_reclaim, // max. percentage of the abandoned segments can be reclaimed per try (=10%) mi_option_destroy_on_exit, // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe - mi_option_arena_reserve, // initial memory size in KiB for arena reservation (= 1 GiB on 64-bit) + mi_option_arena_reserve, // initial memory size for arena reservation (= 1 GiB on 64-bit) (internally, this value is in KiB; use `mi_option_get_size`) mi_option_arena_purge_mult, // multiplier for `purge_delay` for the purging delay for arenas (=10) mi_option_purge_extend_delay, mi_option_abandoned_reclaim_on_free, // allow to reclaim an abandoned segment on a free (=1) mi_option_disallow_arena_alloc, // 1 = do not use arena's for allocation (except if using specific arena id's) + mi_option_retry_on_oom, // retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. (only on windows) _mi_option_last, // legacy option names mi_option_large_os_pages = mi_option_allow_large_os_pages, diff --git a/third-party/mimalloc/include/mimalloc/atomic.h b/third-party/mimalloc/include/mimalloc/atomic.h index 807c4da8..d5333dd9 100644 --- a/third-party/mimalloc/include/mimalloc/atomic.h +++ b/third-party/mimalloc/include/mimalloc/atomic.h @@ -132,7 +132,7 @@ static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { #elif defined(_MSC_VER) -// MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics. +// Legacy MSVC plain C compilation wrapper that uses Interlocked operations to model C11 atomics. #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif @@ -201,7 +201,7 @@ static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_ #else uintptr_t x = *p; if (mo > mi_memory_order_relaxed) { - while (!mi_atomic_compare_exchange_weak_explicit(p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ }; + while (!mi_atomic_compare_exchange_weak_explicit((_Atomic(uintptr_t)*)p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ }; } return x; #endif diff --git a/third-party/mimalloc/include/mimalloc/internal.h b/third-party/mimalloc/include/mimalloc/internal.h index 44f4cafe..6c6e5ed0 100644 --- a/third-party/mimalloc/include/mimalloc/internal.h +++ b/third-party/mimalloc/include/mimalloc/internal.h @@ -14,8 +14,8 @@ terms of the MIT license. A copy of the license can be found in the file // functions and macros. // -------------------------------------------------------------------------- -#include "mimalloc/types.h" -#include "mimalloc/track.h" +#include "types.h" +#include "track.h" #if (MI_DEBUG>0) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) @@ -88,6 +88,7 @@ mi_threadid_t _mi_thread_id(void) mi_attr_noexcept; mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap void _mi_thread_done(mi_heap_t* heap); void _mi_thread_data_collect(void); +void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap); // os.c void _mi_os_init(void); // called from process init @@ -186,11 +187,13 @@ size_t _mi_bin_size(uint8_t bin); // for stats uint8_t _mi_bin(size_t size); // for stats // "heap.c" +void _mi_heap_init(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool noreclaim, uint8_t tag); void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap); void _mi_heap_set_default_direct(mi_heap_t* heap); bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid); void _mi_heap_unsafe_destroy_all(void); +mi_heap_t* _mi_heap_by_tag(mi_heap_t* heap, uint8_t tag); // "stats.c" void _mi_stats_done(mi_stats_t* stats); @@ -379,10 +382,10 @@ static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { } #else /* __builtin_umul_overflow is unavailable */ static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { - #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) + #define MI_MUL_COULD_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) *total = count * size; // note: gcc/clang optimize this to directly check the overflow flag - return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count); + return ((size >= MI_MUL_COULD_OVERFLOW || count >= MI_MUL_COULD_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count); } #endif @@ -546,6 +549,7 @@ static inline mi_heap_t* mi_page_heap(const mi_page_t* page) { static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING); mi_atomic_store_release(&page->xheap,(uintptr_t)heap); + if (heap != NULL) { page->heap_tag = heap->tag; } } // Thread free flag helpers diff --git a/third-party/mimalloc/include/mimalloc/prim.h b/third-party/mimalloc/include/mimalloc/prim.h index 4d813b7f..3f4574dd 100644 --- a/third-party/mimalloc/include/mimalloc/prim.h +++ b/third-party/mimalloc/include/mimalloc/prim.h @@ -26,7 +26,7 @@ typedef struct mi_os_mem_config_s { size_t large_page_size; // 0 if not supported, usually 2MiB (4MiB on Windows) size_t alloc_granularity; // smallest allocation size (usually 4KiB, on Windows 64KiB) bool has_overcommit; // can we reserve more memory than can be actually committed? - bool must_free_whole; // must allocated blocks be freed as a whole (false for mmap, true for VirtualAlloc) + bool has_partial_free; // can allocated blocks be freed partially? (true for mmap, false for VirtualAlloc) bool has_virtual_reserve; // supports virtual address space reservation? (if true we can reserve virtual address space without using commit or physical memory) } mi_os_mem_config_t; @@ -198,7 +198,7 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce tcb[slot] = value; #elif defined(__APPLE__) && defined(__POWERPC__) // ppc, issue #781 MI_UNUSED(ofs); - pthread_setspecific(slot, value); + pthread_setspecific(slot, value); #endif } @@ -208,13 +208,18 @@ static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexce // but unfortunately, it seems we cannot test for this reliably at this time (see issue #883) // Nevertheless, it seems needed on older graviton platforms (see issue #851). // For now, we only enable this for specific platforms. -#if defined(__GNUC__) && (__GNUC__ >= 7) && defined(__aarch64__) /* special case aarch64 for older gcc versions (issue #851) */ \ - && !defined(__APPLE__) /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly ()*/ \ +#if !defined(__APPLE__) /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly ()*/ \ + && !defined(MI_LIBC_MUSL) \ && (!defined(__clang_major__) || __clang_major__ >= 14) /* older clang versions emit bad code; fall back to using the TLS slot () */ -#define MI_USE_BUILTIN_THREAD_POINTER 1 + #if (defined(__GNUC__) && (__GNUC__ >= 7) && defined(__aarch64__)) /* aarch64 for older gcc versions (issue #851) */ \ + || (defined(__GNUC__) && (__GNUC__ >= 11) && defined(__x86_64__)) \ + || (defined(__clang_major__) && (__clang_major__ >= 14) && (defined(__aarch64__) || defined(__x86_64__))) + #define MI_USE_BUILTIN_THREAD_POINTER 1 + #endif #endif + // defined in `init.c`; do not use these directly extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from extern bool _mi_process_is_initialized; // has mi_process_init been called? @@ -222,7 +227,13 @@ extern bool _mi_process_is_initialized; // has mi_process_init been static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept; // Get a unique id for the current thread. -#if defined(_WIN32) +#if defined(MI_PRIM_THREAD_ID) + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + return MI_PRIM_THREAD_ID(); // used for example by CPython for a free threaded build (see python/cpython#115488) +} + +#elif defined(_WIN32) #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN @@ -233,11 +244,11 @@ static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { return (uintptr_t)NtCurrentTeb(); } -#elif MI_USE_BUILTIN_THREAD_POINTER +#elif MI_USE_BUILTIN_THREAD_POINTER static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { // Works on most Unix based platforms with recent compilers - return (uintptr_t)__builtin_thread_pointer(); + return (uintptr_t)__builtin_thread_pointer(); } #elif defined(MI_HAS_TLS_SLOT) diff --git a/third-party/mimalloc/include/mimalloc/types.h b/third-party/mimalloc/include/mimalloc/types.h index cc807ee9..2fdde904 100644 --- a/third-party/mimalloc/include/mimalloc/types.h +++ b/third-party/mimalloc/include/mimalloc/types.h @@ -24,7 +24,7 @@ terms of the MIT license. A copy of the license can be found in the file #include // ptrdiff_t #include // uintptr_t, uint16_t, etc -#include "mimalloc/atomic.h" // _Atomic +#include "atomic.h" // _Atomic #ifdef _MSC_VER #pragma warning(disable:4214) // bitfield is not int @@ -319,6 +319,7 @@ typedef struct mi_page_s { mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) uint16_t used; // number of blocks in use (including blocks in `thread_free`) uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`) + uint8_t heap_tag; // tag of the owning heap, used for separated heaps by object type // padding size_t block_size; // size available in each block (always `>0`) uint8_t* page_start; // start of the page area containing the blocks @@ -538,6 +539,7 @@ struct mi_heap_s { size_t page_retired_max; // largest retired index into the `pages` array. mi_heap_t* next; // list of heaps per thread bool no_reclaim; // `true` if this heap should not reclaim abandoned pages + uint8_t tag; // custom tag, can be used for separating heaps based on the object types mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") }; diff --git a/third-party/mimalloc/readme.md b/third-party/mimalloc/readme.md index 91974587..a0296b43 100644 --- a/third-party/mimalloc/readme.md +++ b/third-party/mimalloc/readme.md @@ -12,8 +12,8 @@ is a general purpose allocator with excellent [performance](#performance) charac Initially developed by Daan Leijen for the runtime systems of the [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. -Latest release tag: `v2.1.6` (2024-05-13). -Latest v1 tag: `v1.8.6` (2024-05-13). +Latest release tag: `v2.1.7` (2024-05-21). +Latest v1 tag: `v1.8.7` (2024-05-21). mimalloc is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: @@ -82,6 +82,8 @@ memory usage and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance (see [below](#performance)); please report if you observe any significant performance regression. +* 2024-05-21, `v1.8.7`, `v2.1.7`: Fix build issues on less common platforms. Started upstreaming patches + from the CPython [integration](https://github.com/python/cpython/issues/113141#issuecomment-2119255217). Upstream `vcpkg` patches. * 2024-05-13, `v1.8.6`, `v2.1.6`: Fix build errors on various (older) platforms. Refactored aligned allocation. * 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Add `MI_LIBC_MUSL` cmake flag for musl builds. Free-ing code is refactored into a separate module (`free.c`). Mimalloc page info is simplified with the block size diff --git a/third-party/mimalloc/src/arena.c b/third-party/mimalloc/src/arena.c index 62bea78b..648ee844 100644 --- a/third-party/mimalloc/src/arena.c +++ b/third-party/mimalloc/src/arena.c @@ -51,12 +51,13 @@ typedef struct mi_arena_s { bool exclusive; // only allow allocations if specifically for this arena bool is_large; // memory area consists of large- or huge OS pages (always committed) _Atomic(size_t) search_idx; // optimization to start the search for free blocks - _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. + _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here) mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) + // do not add further fields here as the dirty, committed, purged, and abandoned bitmaps follow the inuse bitmap fields. } mi_arena_t; @@ -144,18 +145,19 @@ static bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, mi_bit #define MI_ARENA_STATIC_MAX (MI_INTPTR_SIZE*MI_KiB) // 8 KiB on 64-bit -static uint8_t mi_arena_static[MI_ARENA_STATIC_MAX]; -static _Atomic(size_t) mi_arena_static_top; +static mi_decl_cache_align uint8_t mi_arena_static[MI_ARENA_STATIC_MAX]; // must be cache aligned, see issue #895 +static mi_decl_cache_align _Atomic(size_t) mi_arena_static_top; static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* memid) { *memid = _mi_memid_none(); if (size == 0 || size > MI_ARENA_STATIC_MAX) return NULL; - if ((mi_atomic_load_relaxed(&mi_arena_static_top) + size) > MI_ARENA_STATIC_MAX) return NULL; + const size_t toplow = mi_atomic_load_relaxed(&mi_arena_static_top); + if ((toplow + size) > MI_ARENA_STATIC_MAX) return NULL; // try to claim space - if (alignment == 0) { alignment = 1; } + if (alignment < MI_MAX_ALIGN_SIZE) { alignment = MI_MAX_ALIGN_SIZE; } const size_t oversize = size + alignment - 1; - if (oversize > MI_ARENA_STATIC_MAX) return NULL; + if (toplow + oversize > MI_ARENA_STATIC_MAX) return NULL; const size_t oldtop = mi_atomic_add_acq_rel(&mi_arena_static_top, oversize); size_t top = oldtop + oversize; if (top > MI_ARENA_STATIC_MAX) { @@ -169,7 +171,7 @@ static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* m memid->initially_zero = true; const size_t start = _mi_align_up(oldtop, alignment); uint8_t* const p = &mi_arena_static[start]; - _mi_memzero(p, size); + _mi_memzero_aligned(p, size); return p; } diff --git a/third-party/mimalloc/src/heap.c b/third-party/mimalloc/src/heap.c index 6c56edd6..e498fdb2 100644 --- a/third-party/mimalloc/src/heap.c +++ b/third-party/mimalloc/src/heap.c @@ -128,6 +128,9 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) const bool force = (collect >= MI_FORCE); _mi_deferred_free(heap, force); + // python/cpython#112532: we may be called from a thread that is not the owner of the heap + const bool is_main_thread = (_mi_is_main_thread() && heap->thread_id == _mi_thread_id()); + // note: never reclaim on collect but leave it to threads that need storage to reclaim const bool force_main = #ifdef NDEBUG @@ -135,7 +138,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) #else collect >= MI_FORCE #endif - && _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim; + && is_main_thread && mi_heap_is_backing(heap) && !heap->no_reclaim; if (force_main) { // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments. @@ -164,7 +167,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) _mi_abandoned_collect(heap, collect == MI_FORCE /* force? */, &heap->tld->segments); // if forced, collect thread data cache on program-exit (or shared library unload) - if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) { + if (force && is_main_thread && mi_heap_is_backing(heap)) { _mi_thread_data_collect(); // collect thread data cache } @@ -208,22 +211,33 @@ mi_heap_t* mi_heap_get_backing(void) { return bheap; } -mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) { - mi_heap_t* bheap = mi_heap_get_backing(); - mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode? - if (heap == NULL) return NULL; +void _mi_heap_init(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool noreclaim, uint8_t tag) { _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t)); - heap->tld = bheap->tld; - heap->thread_id = _mi_thread_id(); - heap->arena_id = arena_id; - _mi_random_split(&bheap->random, &heap->random); - heap->cookie = _mi_heap_random_next(heap) | 1; + heap->tld = tld; + heap->thread_id = _mi_thread_id(); + heap->arena_id = arena_id; + heap->no_reclaim = noreclaim; + heap->tag = tag; + if (heap == tld->heap_backing) { + _mi_random_init(&heap->random); + } + else { + _mi_random_split(&tld->heap_backing->random, &heap->random); + } + heap->cookie = _mi_heap_random_next(heap) | 1; heap->keys[0] = _mi_heap_random_next(heap); heap->keys[1] = _mi_heap_random_next(heap); - heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe // push on the thread local heaps list heap->next = heap->tld->heaps; heap->tld->heaps = heap; +} + +mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) { + mi_heap_t* bheap = mi_heap_get_backing(); + mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode? + if (heap == NULL) return NULL; + // don't reclaim abandoned pages or otherwise destroy is unsafe + _mi_heap_init(heap, bheap->tld, arena_id, true /* no reclaim */, 0 /* default tag */); return heap; } @@ -281,6 +295,18 @@ static void mi_heap_free(mi_heap_t* heap) { mi_free(heap); } +// return a heap on the same thread as `heap` specialized for the specified tag (if it exists) +mi_heap_t* _mi_heap_by_tag(mi_heap_t* heap, uint8_t tag) { + if (heap->tag == tag) { + return heap; + } + for (mi_heap_t *curr = heap->tld->heaps; curr != NULL; curr = curr->next) { + if (curr->tag == tag) { + return curr; + } + } + return NULL; +} /* ----------------------------------------------------------- Heap destroy diff --git a/third-party/mimalloc/src/init.c b/third-party/mimalloc/src/init.c index 33161062..6f51ca89 100644 --- a/third-party/mimalloc/src/init.c +++ b/third-party/mimalloc/src/init.c @@ -25,6 +25,7 @@ const mi_page_t _mi_page_empty = { NULL, // local_free 0, // used 0, // block size shift + 0, // heap tag 0, // block_size NULL, // page_start #if (MI_PADDING || MI_ENCODE_FREELIST) @@ -33,9 +34,7 @@ const mi_page_t _mi_page_empty = { MI_ATOMIC_VAR_INIT(0), // xthread_free MI_ATOMIC_VAR_INIT(0), // xheap NULL, NULL - #if MI_INTPTR_SIZE==8 , { 0 } // padding - #endif }; #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) @@ -124,7 +123,8 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = { 0, // page count MI_BIN_FULL, 0, // page retired min/max NULL, // next - false, + false, // can reclaim + 0, // tag MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY }; @@ -170,6 +170,7 @@ mi_heap_t _mi_heap_main = { MI_BIN_FULL, 0, // page retired min/max NULL, // next heap false, // can reclaim + 0, // tag MI_SMALL_PAGES_EMPTY, MI_PAGE_QUEUES_EMPTY }; @@ -288,7 +289,7 @@ void _mi_thread_data_collect(void) { } // Initialize the thread local default heap, called from `mi_thread_init` -static bool _mi_heap_init(void) { +static bool _mi_thread_heap_init(void) { if (mi_heap_is_initialized(mi_prim_get_default_heap())) return true; if (_mi_is_main_thread()) { // mi_assert_internal(_mi_heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization @@ -304,26 +305,25 @@ static bool _mi_heap_init(void) { mi_tld_t* tld = &td->tld; mi_heap_t* heap = &td->heap; - _mi_memcpy_aligned(tld, &tld_empty, sizeof(*tld)); - _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(*heap)); - heap->thread_id = _mi_thread_id(); - _mi_random_init(&heap->random); - heap->cookie = _mi_heap_random_next(heap) | 1; - heap->keys[0] = _mi_heap_random_next(heap); - heap->keys[1] = _mi_heap_random_next(heap); - heap->tld = tld; - tld->heap_backing = heap; - tld->heaps = heap; - tld->segments.stats = &tld->stats; - tld->segments.os = &tld->os; - tld->os.stats = &tld->stats; - _mi_heap_set_default_direct(heap); + _mi_tld_init(tld, heap); // must be before `_mi_heap_init` + _mi_heap_init(heap, tld, _mi_arena_id_none(), false /* can reclaim */, 0 /* default tag */); + _mi_heap_set_default_direct(heap); } return false; } +// initialize thread local data +void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) { + _mi_memcpy_aligned(tld, &tld_empty, sizeof(mi_tld_t)); + tld->heap_backing = bheap; + tld->heaps = NULL; + tld->segments.stats = &tld->stats; + tld->segments.os = &tld->os; + tld->os.stats = &tld->stats; +} + // Free the thread local default heap (called from `mi_thread_done`) -static bool _mi_heap_done(mi_heap_t* heap) { +static bool _mi_thread_heap_done(mi_heap_t* heap) { if (!mi_heap_is_initialized(heap)) return true; // reset default heap @@ -420,7 +420,7 @@ void mi_thread_init(void) mi_attr_noexcept // initialize the thread local default heap // (this will call `_mi_heap_set_default_direct` and thus set the // fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called) - if (_mi_heap_init()) return; // returns true if already initialized + if (_mi_thread_heap_init()) return; // returns true if already initialized _mi_stat_increase(&_mi_stats_main.threads, 1); mi_atomic_increment_relaxed(&thread_count); @@ -452,7 +452,7 @@ void _mi_thread_done(mi_heap_t* heap) if (heap->thread_id != _mi_thread_id()) return; // abandon the thread local heap - if (_mi_heap_done(heap)) return; // returns true if already ran + if (_mi_thread_heap_done(heap)) return; // returns true if already ran } void _mi_heap_set_default_direct(mi_heap_t* heap) { diff --git a/third-party/mimalloc/src/options.c b/third-party/mimalloc/src/options.c index fba90761..a62727dd 100644 --- a/third-party/mimalloc/src/options.c +++ b/third-party/mimalloc/src/options.c @@ -65,7 +65,7 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION_LEGACY(allow_large_os_pages,large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages {-1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N - { 0, UNINIT, MI_OPTION(reserve_os_memory) }, // reserve OS memory in advance + { 0, UNINIT, MI_OPTION(reserve_os_memory) }, // reserve N KiB OS memory in advance (use `option_get_size`) { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread { 0, UNINIT, MI_OPTION(deprecated_page_reset) }, // reset page memory on free { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_purge,abandoned_page_reset) }, // reset free page memory when a thread terminates @@ -79,19 +79,20 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. { 0, UNINIT, MI_OPTION_LEGACY(disallow_os_alloc,limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose - { 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output - { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output - { 10, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. percentage of the abandoned segments per try. + { 32, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output + { 32, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output + { 10, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. percentage of the abandoned segments to be reclaimed per try. { 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees! #if (MI_INTPTR_SIZE>4) - { 1024L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time + { 1024L*1024L, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time (=1GiB) (use `option_get_size`) #else - { 128L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, + { 128L*1024L, UNINIT, MI_OPTION(arena_reserve) }, // =128MiB on 32-bit #endif { 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's { 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) }, { 1, UNINIT, MI_OPTION(abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free { 0, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's) + { 400, UNINIT, MI_OPTION(retry_on_oom) }, // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. }; static void mi_option_init(mi_option_desc_t* desc); @@ -135,8 +136,12 @@ mi_decl_nodiscard long mi_option_get_clamp(mi_option_t option, long min, long ma mi_decl_nodiscard size_t mi_option_get_size(mi_option_t option) { mi_assert_internal(mi_option_has_size_in_kib(option)); - long x = mi_option_get(option); - return (x < 0 ? 0 : (size_t)x * MI_KiB); + const long x = mi_option_get(option); + size_t size = (x < 0 ? 0 : (size_t)x); + if (mi_option_has_size_in_kib(option)) { + size *= MI_KiB; + } + return size; } void mi_option_set(mi_option_t option, long value) { @@ -479,14 +484,20 @@ static void mi_option_init(mi_option_desc_t* desc) { else { char* end = buf; long value = strtol(buf, &end, 10); - if (desc->option == mi_option_reserve_os_memory || desc->option == mi_option_arena_reserve) { - // this option is interpreted in KiB to prevent overflow of `long` + if (mi_option_has_size_in_kib(desc->option)) { + // this option is interpreted in KiB to prevent overflow of `long` for large allocations + // (long is 32-bit on 64-bit windows, which allows for 4TiB max.) + size_t size = (value < 0 ? 0 : (size_t)value); + bool overflow = false; if (*end == 'K') { end++; } - else if (*end == 'M') { value *= MI_KiB; end++; } - else if (*end == 'G') { value *= MI_MiB; end++; } - else { value = (value + MI_KiB - 1) / MI_KiB; } - if (end[0] == 'I' && end[1] == 'B') { end += 2; } - else if (*end == 'B') { end++; } + else if (*end == 'M') { overflow = mi_mul_overflow(size,MI_KiB,&size); end++; } + else if (*end == 'G') { overflow = mi_mul_overflow(size,MI_MiB,&size); end++; } + else if (*end == 'T') { overflow = mi_mul_overflow(size,MI_GiB,&size); end++; } + else { size = (size + MI_KiB - 1) / MI_KiB; } + if (end[0] == 'I' && end[1] == 'B') { end += 2; } // KiB, MiB, GiB, TiB + else if (*end == 'B') { end++; } // Kb, Mb, Gb, Tb + if (overflow || size > MI_MAX_ALLOC_SIZE) { size = (MI_MAX_ALLOC_SIZE / MI_KiB); } + value = (size > LONG_MAX ? LONG_MAX : (long)size); } if (*end == 0) { desc->value = value; diff --git a/third-party/mimalloc/src/os.c b/third-party/mimalloc/src/os.c index dda6844c..ce104273 100644 --- a/third-party/mimalloc/src/os.c +++ b/third-party/mimalloc/src/os.c @@ -11,9 +11,7 @@ terms of the MIT license. A copy of the license can be found in the file /* ----------------------------------------------------------- - Initialization. - On windows initializes support for aligned allocation and - large OS pages (if MIMALLOC_LARGE_OS_PAGES is true). + Initialization. ----------------------------------------------------------- */ static mi_os_mem_config_t mi_os_mem_config = { @@ -21,7 +19,7 @@ static mi_os_mem_config_t mi_os_mem_config = { 0, // large page size (usually 2MiB) 4096, // allocation granularity true, // has overcommit? (if true we use MAP_NORESERVE on mmap systems) - false, // must free whole? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span) + false, // can we partially free allocated blocks? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span) true // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory) }; @@ -239,7 +237,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit if (size >= (SIZE_MAX - alignment)) return NULL; // overflow const size_t over_size = size + alignment; - if (mi_os_mem_config.must_free_whole) { // win32 virtualAlloc cannot free parts of an allocate block + if (!mi_os_mem_config.has_partial_free) { // win32 virtualAlloc cannot free parts of an allocated block // over-allocate uncommitted (virtual) memory p = mi_os_prim_alloc(over_size, 1 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats); if (p == NULL) return NULL; @@ -260,7 +258,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats); if (p == NULL) return NULL; - // and selectively unmap parts around the over-allocated area. (noop on sbrk) + // and selectively unmap parts around the over-allocated area. void* aligned_p = mi_align_up_ptr(p, alignment); size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p; size_t mid_size = _mi_align_up(size, _mi_os_page_size()); @@ -268,7 +266,7 @@ static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size); if (pre_size > 0) { mi_os_prim_free(p, pre_size, commit, stats); } if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); } - // we can return the aligned pointer on `mmap` (and sbrk) systems + // we can return the aligned pointer on `mmap` systems p = aligned_p; *base = aligned_p; // since we freed the pre part, `*base == p`. } diff --git a/third-party/mimalloc/src/prim/emscripten/prim.c b/third-party/mimalloc/src/prim/emscripten/prim.c index 1f60a1bb..f3797c9e 100644 --- a/third-party/mimalloc/src/prim/emscripten/prim.c +++ b/third-party/mimalloc/src/prim/emscripten/prim.c @@ -51,7 +51,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config) { config->page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB config->alloc_granularity = 16; config->has_overcommit = false; - config->must_free_whole = true; + config->has_partial_free = false; config->has_virtual_reserve = false; } diff --git a/third-party/mimalloc/src/prim/osx/alloc-override-zone.c b/third-party/mimalloc/src/prim/osx/alloc-override-zone.c index 9a317750..1515b886 100644 --- a/third-party/mimalloc/src/prim/osx/alloc-override-zone.c +++ b/third-party/mimalloc/src/prim/osx/alloc-override-zone.c @@ -422,6 +422,7 @@ __attribute__((constructor(0))) #else __attribute__((constructor)) // seems not supported by g++-11 on the M1 #endif +__attribute__((used)) static void _mi_macos_override_malloc(void) { malloc_zone_t* purgeable_zone = NULL; diff --git a/third-party/mimalloc/src/prim/unix/prim.c b/third-party/mimalloc/src/prim/unix/prim.c index 7e4e8f7b..90a4aac2 100644 --- a/third-party/mimalloc/src/prim/unix/prim.c +++ b/third-party/mimalloc/src/prim/unix/prim.c @@ -29,7 +29,7 @@ terms of the MIT license. A copy of the license can be found in the file #include // sysconf #include // open, close, read, access #include - + #if defined(__linux__) #include #if defined(MI_NO_THP) @@ -58,7 +58,7 @@ terms of the MIT license. A copy of the license can be found in the file #include #endif -#if !defined(__HAIKU__) && !defined(__APPLE__) && !defined(__CYGWIN__) && !defined(__OpenBSD__) && !defined(__sun) +#if defined(__linux__) || defined(__FreeBSD__) #define MI_HAS_SYSCALL_H #include #endif @@ -66,39 +66,38 @@ terms of the MIT license. A copy of the license can be found in the file //------------------------------------------------------------------------------------ // Use syscalls for some primitives to allow for libraries that override open/read/close etc. -// and do allocation themselves; using syscalls prevents recursion when mimalloc is +// and do allocation themselves; using syscalls prevents recursion when mimalloc is // still initializing (issue #713) +// Declare inline to avoid unused function warnings. //------------------------------------------------------------------------------------ - #if defined(MI_HAS_SYSCALL_H) && defined(SYS_open) && defined(SYS_close) && defined(SYS_read) && defined(SYS_access) -static int mi_prim_open(const char* fpath, int open_flags) { +static inline int mi_prim_open(const char* fpath, int open_flags) { return syscall(SYS_open,fpath,open_flags,0); } -static ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) { +static inline ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) { return syscall(SYS_read,fd,buf,bufsize); } -static int mi_prim_close(int fd) { +static inline int mi_prim_close(int fd) { return syscall(SYS_close,fd); } -static int mi_prim_access(const char *fpath, int mode) { +static inline int mi_prim_access(const char *fpath, int mode) { return syscall(SYS_access,fpath,mode); } -#elif !defined(__sun) && \ - (!defined(__APPLE__) || (MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_X_VERSION_10_7)) // avoid unused warnings on macOS and Solaris +#else -static int mi_prim_open(const char* fpath, int open_flags) { +static inline int mi_prim_open(const char* fpath, int open_flags) { return open(fpath,open_flags); } -static ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) { +static inline ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) { return read(fd,buf,bufsize); } -static int mi_prim_close(int fd) { +static inline int mi_prim_close(int fd) { return close(fd); } -static int mi_prim_access(const char *fpath, int mode) { +static inline int mi_prim_access(const char *fpath, int mode) { return access(fpath,mode); } @@ -131,12 +130,12 @@ static bool unix_detect_overcommit(void) { os_overcommit = (val != 0); } #else - // default: overcommit is true + // default: overcommit is true #endif return os_overcommit; } -void _mi_prim_mem_init( mi_os_mem_config_t* config ) +void _mi_prim_mem_init( mi_os_mem_config_t* config ) { long psize = sysconf(_SC_PAGESIZE); if (psize > 0) { @@ -145,7 +144,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) } config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this? config->has_overcommit = unix_detect_overcommit(); - config->must_free_whole = false; // mmap can free in parts + config->has_partial_free = true; // mmap can free in parts config->has_virtual_reserve = true; // todo: check if this true for NetBSD? (for anonymous mmap with PROT_NONE) // disable transparent huge pages for this process? @@ -198,12 +197,12 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p size_t n = mi_bsr(try_alignment); if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); - if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { + if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { int err = errno; - _mi_warning_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, addr); + _mi_trace_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, addr); } if (p!=MAP_FAILED) return p; - // fall back to regular mmap + // fall back to regular mmap } } #elif defined(MAP_ALIGN) // Solaris @@ -219,16 +218,16 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p void* hint = _mi_os_get_aligned_hint(try_alignment, size); if (hint != NULL) { p = mmap(hint, size, protect_flags, flags, fd, 0); - if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { + if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { #if MI_TRACK_ENABLED // asan sometimes does not instrument errno correctly? int err = 0; #else int err = errno; #endif - _mi_warning_message("unable to directly request hinted aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint); + _mi_trace_message("unable to directly request hinted aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint); } if (p!=MAP_FAILED) return p; - // fall back to regular mmap + // fall back to regular mmap } } #endif @@ -357,9 +356,9 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(commit || !allow_large); mi_assert_internal(try_alignment > 0); - + *is_zero = true; - int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); + int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); *addr = unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); return (*addr != NULL ? 0 : errno); } @@ -387,19 +386,19 @@ int _mi_prim_commit(void* start, size_t size, bool* is_zero) { // was either from mmap PROT_NONE, or from decommit MADV_DONTNEED, but // we sometimes call commit on a range with still partially committed // memory and `mprotect` does not zero the range. - *is_zero = false; + *is_zero = false; int err = mprotect(start, size, (PROT_READ | PROT_WRITE)); - if (err != 0) { - err = errno; + if (err != 0) { + err = errno; unix_mprotect_hint(err); } return err; } int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { - int err = 0; + int err = 0; // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) - err = unix_madvise(start, size, MADV_DONTNEED); + err = unix_madvise(start, size, MADV_DONTNEED); #if !MI_DEBUG && !MI_SECURE *needs_recommit = false; #else @@ -411,15 +410,15 @@ int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { *needs_recommit = true; const int fd = unix_mmap_fd(); void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); - if (p != start) { err = errno; } + if (p != start) { err = errno; } */ return err; } int _mi_prim_reset(void* start, size_t size) { - // We try to use `MADV_FREE` as that is the fastest. A drawback though is that it + // We try to use `MADV_FREE` as that is the fastest. A drawback though is that it // will not reduce the `rss` stats in tools like `top` even though the memory is available - // to other processes. With the default `MIMALLOC_PURGE_DECOMMITS=1` we ensure that by + // to other processes. With the default `MIMALLOC_PURGE_DECOMMITS=1` we ensure that by // default `MADV_DONTNEED` is used though. #if defined(MADV_FREE) static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); @@ -439,7 +438,7 @@ int _mi_prim_reset(void* start, size_t size) { int _mi_prim_protect(void* start, size_t size, bool protect) { int err = mprotect(start, size, protect ? PROT_NONE : (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } + if (err != 0) { err = errno; } unix_mprotect_hint(err); return err; } @@ -480,7 +479,7 @@ int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bo if (err != 0) { err = errno; _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d (error: %d (0x%x))\n", numa_node, err, err); - } + } } return (*addr != NULL ? 0 : errno); } @@ -595,9 +594,9 @@ mi_msecs_t _mi_prim_clock_now(void) { // low resolution timer mi_msecs_t _mi_prim_clock_now(void) { #if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0) - return (mi_msecs_t)clock(); + return (mi_msecs_t)clock(); #elif (CLOCKS_PER_SEC < 1000) - return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC); + return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC); #else return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000); #endif @@ -637,7 +636,7 @@ void _mi_prim_process_info(mi_process_info_t* pinfo) pinfo->stime = timeval_secs(&rusage.ru_stime); #if !defined(__HAIKU__) pinfo->page_faults = rusage.ru_majflt; -#endif +#endif #if defined(__HAIKU__) // Haiku does not have (yet?) a way to // get these stats per process @@ -764,7 +763,7 @@ bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { bool _mi_prim_random_buf(void* buf, size_t buf_len) { // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf // may fail silently on macOS. See PR #390, and - return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess); + return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess); } #elif defined(__ANDROID__) || defined(__DragonFly__) || \ @@ -772,7 +771,6 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { defined(__sun) || \ (defined(__APPLE__) && (MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7)) -#include bool _mi_prim_random_buf(void* buf, size_t buf_len) { arc4random_buf(buf, buf_len); return true; @@ -863,7 +861,7 @@ void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { } } -#else +#else void _mi_prim_thread_init_auto_done(void) { // nothing diff --git a/third-party/mimalloc/src/prim/wasi/prim.c b/third-party/mimalloc/src/prim/wasi/prim.c index f74acd2a..e95f67f5 100644 --- a/third-party/mimalloc/src/prim/wasi/prim.c +++ b/third-party/mimalloc/src/prim/wasi/prim.c @@ -23,7 +23,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) { config->page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB config->alloc_granularity = 16; config->has_overcommit = false; - config->must_free_whole = true; + config->has_partial_free = false; config->has_virtual_reserve = false; } diff --git a/third-party/mimalloc/src/prim/windows/prim.c b/third-party/mimalloc/src/prim/windows/prim.c index 2dd7c602..5074ad4c 100644 --- a/third-party/mimalloc/src/prim/windows/prim.c +++ b/third-party/mimalloc/src/prim/windows/prim.c @@ -112,7 +112,7 @@ static bool win_enable_large_os_pages(size_t* large_page_size) void _mi_prim_mem_init( mi_os_mem_config_t* config ) { config->has_overcommit = false; - config->must_free_whole = true; + config->has_partial_free = false; config->has_virtual_reserve = true; // get the page size SYSTEM_INFO si; @@ -178,7 +178,7 @@ int _mi_prim_free(void* addr, size_t size ) { // VirtualAlloc //--------------------------------------------- -static void* win_virtual_alloc_prim(void* addr, size_t size, size_t try_alignment, DWORD flags) { +static void* win_virtual_alloc_prim_once(void* addr, size_t size, size_t try_alignment, DWORD flags) { #if (MI_INTPTR_SIZE >= 8) // on 64-bit systems, try to use the virtual address area after 2TiB for 4MiB aligned allocations if (addr == NULL) { @@ -200,13 +200,53 @@ static void* win_virtual_alloc_prim(void* addr, size_t size, size_t try_alignmen param.Arg.Pointer = &reqs; void* p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, ¶m, 1); if (p != NULL) return p; - _mi_warning_message("unable to allocate aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), addr, try_alignment, flags); + _mi_warning_message("unable to allocate aligned OS memory (0x%zx bytes, error code: 0x%x, address: %p, alignment: 0x%zx, flags: 0x%x)\n", size, GetLastError(), addr, try_alignment, flags); // fall through on error } // last resort return VirtualAlloc(addr, size, flags, PAGE_READWRITE); } +static bool win_is_out_of_memory_error(DWORD err) { + switch (err) { + case ERROR_COMMITMENT_MINIMUM: + case ERROR_COMMITMENT_LIMIT: + case ERROR_PAGEFILE_QUOTA: + case ERROR_NOT_ENOUGH_MEMORY: + return true; + default: + return false; + } +} + +static void* win_virtual_alloc_prim(void* addr, size_t size, size_t try_alignment, DWORD flags) { + long max_retry_msecs = mi_option_get_clamp(mi_option_retry_on_oom, 0, 2000); // at most 2 seconds + if (max_retry_msecs == 1) { max_retry_msecs = 100; } // if one sets the option to "true" + for (long tries = 1; tries <= 10; tries++) { // try at most 10 times (=2200ms) + void* p = win_virtual_alloc_prim_once(addr, size, try_alignment, flags); + if (p != NULL) { + // success, return the address + return p; + } + else if (max_retry_msecs > 0 && (try_alignment <= 2*MI_SEGMENT_ALIGN) && + (flags&MEM_COMMIT) != 0 && (flags&MEM_LARGE_PAGES) == 0 && + win_is_out_of_memory_error(GetLastError())) { + // if committing regular memory and being out-of-memory, + // keep trying for a bit in case memory frees up after all. See issue #894 + _mi_warning_message("out-of-memory on OS allocation, try again... (attempt %lu, 0x%zx bytes, error code: 0x%x, address: %p, alignment: 0x%zx, flags: 0x%x)\n", tries, size, GetLastError(), addr, try_alignment, flags); + long sleep_msecs = tries*40; // increasing waits + if (sleep_msecs > max_retry_msecs) { sleep_msecs = max_retry_msecs; } + max_retry_msecs -= sleep_msecs; + Sleep(sleep_msecs); + } + else { + // otherwise return with an error + break; + } + } + return NULL; +} + static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) { mi_assert_internal(!(large_only && !allow_large)); static _Atomic(size_t) large_page_try_ok; // = 0; @@ -572,6 +612,7 @@ bool _mi_prim_random_buf(void* buf, size_t buf_len) { #if !defined(MI_SHARED_LIB) // use thread local storage keys to detect thread ending +// note: another design could be to use special linker sections (see issue #869) #include #if (_WIN32_WINNT < 0x600) // before Windows Vista WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback ); diff --git a/third-party/mimalloc/src/segment.c b/third-party/mimalloc/src/segment.c index 9ac22f15..4e4dcb80 100644 --- a/third-party/mimalloc/src/segment.c +++ b/third-party/mimalloc/src/segment.c @@ -347,7 +347,7 @@ uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* pa } -static size_t mi_segment_calculate_slices(size_t required, size_t* pre_size, size_t* info_slices) { +static size_t mi_segment_calculate_slices(size_t required, size_t* info_slices) { size_t page_size = _mi_os_page_size(); size_t isize = _mi_align_up(sizeof(mi_segment_t), page_size); size_t guardsize = 0; @@ -361,7 +361,6 @@ static size_t mi_segment_calculate_slices(size_t required, size_t* pre_size, siz } } - if (pre_size != NULL) *pre_size = isize; isize = _mi_align_up(isize + guardsize, MI_SEGMENT_SLICE_SIZE); if (info_slices != NULL) *info_slices = isize / MI_SEGMENT_SLICE_SIZE; size_t segment_size = (required==0 ? MI_SEGMENT_SIZE : _mi_align_up( required + isize + guardsize, MI_SEGMENT_SLICE_SIZE) ); @@ -624,7 +623,9 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size mi_assert_internal(slice->slice_count == slice_count); // no overflow? slice->slice_offset = 0; if (slice_count > 1) { - mi_slice_t* last = &segment->slices[slice_index + slice_count - 1]; + mi_slice_t* last = slice + slice_count - 1; + mi_slice_t* end = (mi_slice_t*)mi_segment_slices_end(segment); + if (last > end) { last = end; } last->slice_count = 0; last->slice_offset = (uint32_t)(sizeof(mi_page_t)*(slice_count - 1)); last->block_size = 0; @@ -808,7 +809,7 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_aren ----------------------------------------------------------- */ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment, bool eager_delayed, mi_arena_id_t req_arena_id, - size_t* psegment_slices, size_t* ppre_size, size_t* pinfo_slices, + size_t* psegment_slices, size_t* pinfo_slices, bool commit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { @@ -825,7 +826,7 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment align_offset = _mi_align_up( info_size, MI_SEGMENT_ALIGN ); const size_t extra = align_offset - info_size; // recalculate due to potential guard pages - *psegment_slices = mi_segment_calculate_slices(required + extra, ppre_size, pinfo_slices); + *psegment_slices = mi_segment_calculate_slices(required + extra, pinfo_slices); mi_assert_internal(*psegment_slices > 0 && *psegment_slices <= UINT32_MAX); } @@ -874,8 +875,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi // calculate needed sizes first size_t info_slices; - size_t pre_size; - size_t segment_slices = mi_segment_calculate_slices(required, &pre_size, &info_slices); + size_t segment_slices = mi_segment_calculate_slices(required, &info_slices); mi_assert_internal(segment_slices > 0 && segment_slices <= UINT32_MAX); // Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little) @@ -887,7 +887,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi // Allocate the segment from the OS mi_segment_t* segment = mi_segment_os_alloc(required, page_alignment, eager_delay, req_arena_id, - &segment_slices, &pre_size, &info_slices, commit, tld, os_tld); + &segment_slices, &info_slices, commit, tld, os_tld); if (segment == NULL) return NULL; // zero the segment info? -- not always needed as it may be zero initialized from the OS @@ -915,8 +915,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi if (MI_SECURE>0) { // in secure mode, we set up a protected page in between the segment info // and the page data, and at the end of the segment. - size_t os_pagesize = _mi_os_page_size(); - mi_assert_internal(mi_segment_info_size(segment) - os_pagesize >= pre_size); + size_t os_pagesize = _mi_os_page_size(); _mi_os_protect((uint8_t*)segment + mi_segment_info_size(segment) - os_pagesize, os_pagesize); uint8_t* end = (uint8_t*)segment + mi_segment_size(segment) - os_pagesize; mi_segment_ensure_committed(segment, end, os_pagesize, tld->stats); @@ -1007,11 +1006,13 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld _mi_os_reset(start, psize, tld->stats); } - // zero the page data, but not the segment fields + // zero the page data, but not the segment fields and heap tag page->is_zero_init = false; + uint8_t heap_tag = page->heap_tag; ptrdiff_t ofs = offsetof(mi_page_t, capacity); _mi_memzero((uint8_t*)page + ofs, sizeof(*page) - ofs); page->block_size = 1; + page->heap_tag = heap_tag; // and free it mi_slice_t* slice = mi_segment_span_free_coalesce(mi_page_to_slice(page), tld); @@ -1212,8 +1213,13 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, mi_assert_internal(page->next == NULL && page->prev==NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); segment->abandoned--; - // set the heap again and allow delayed free again - mi_page_set_heap(page, heap); + // set the heap again and allow heap thread delayed free again. + mi_heap_t* target_heap = _mi_heap_by_tag(heap, page->heap_tag); // allow custom heaps to separate objects + if (target_heap == NULL) { + target_heap = heap; + _mi_error_message(EINVAL, "page with tag %u cannot be reclaimed by a heap with the same tag (using %u instead)\n", page->heap_tag, heap->tag ); + } + mi_page_set_heap(page, target_heap); _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) _mi_page_free_collect(page, false); // ensure used count is up to date if (mi_page_all_free(page)) { @@ -1222,8 +1228,8 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, } else { // otherwise reclaim it into the heap - _mi_page_reclaim(heap, page); - if (requested_block_size == mi_page_block_size(page) && mi_page_has_any_available(page)) { + _mi_page_reclaim(target_heap, page); + if (requested_block_size == mi_page_block_size(page) && mi_page_has_any_available(page) && heap == target_heap) { if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; } } } diff --git a/third-party/tbb/.bazelversion b/third-party/tbb/.bazelversion index 09b254e9..21c8c7b4 100644 --- a/third-party/tbb/.bazelversion +++ b/third-party/tbb/.bazelversion @@ -1 +1 @@ -6.0.0 +7.1.1 diff --git a/third-party/tbb/.github/CODEOWNERS b/third-party/tbb/.github/CODEOWNERS new file mode 100644 index 00000000..31805797 --- /dev/null +++ b/third-party/tbb/.github/CODEOWNERS @@ -0,0 +1,7 @@ +# Lines starting with '#' are comments. +# Each line is a file pattern followed by one or more owners. + +# More details are here: https://help.github.com/articles/about-codeowners/ + +src/tbbmalloc @ldorau @lplewa @kfilipek +src/tbbmalloc_proxy @ldorau @lplewa @kfilipek diff --git a/third-party/tbb/.github/workflows/ci.yml b/third-party/tbb/.github/workflows/ci.yml index a6d710f8..a65de622 100644 --- a/third-party/tbb/.github/workflows/ci.yml +++ b/third-party/tbb/.github/workflows/ci.yml @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023 Intel Corporation +# Copyright (c) 2021-2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -25,6 +25,8 @@ on: - synchronize - reopened +permissions: read-all + env: BUILD_CONCURRENCY: 2 MACOS_BUILD_CONCURRENCY: 3 @@ -57,7 +59,7 @@ jobs: needs: [codespell] env: BUILD_TYPE: oss - runs-on: [ubuntu-20.04] + runs-on: [ubuntu-22.04] timeout-minutes: 10 steps: - uses: actions/checkout@v2 @@ -80,6 +82,10 @@ jobs: pages: if: ${{ github.ref == 'refs/heads/master' }} + permissions: + contents: write + pages: write + id-token: write runs-on: ubuntu-latest needs: [documentation] steps: @@ -140,7 +146,7 @@ jobs: ctest -R python_test --output-on-failure --timeout ${TEST_TIMEOUT} linux-testing: - name: ${{ matrix.os }}_${{ matrix.cxx_compiler }}_cxx${{ matrix.std }}_${{ matrix.build_type }}_preview=${{ matrix.preview }} + name: ${{ matrix.os }}_${{ matrix.cxx_compiler }}_cxx${{ matrix.std }}_${{ matrix.build_type }}_preview=${{ matrix.preview }}${{ matrix.cmake_static }} runs-on: ['${{ matrix.os }}'] timeout-minutes: 45 strategy: @@ -165,6 +171,13 @@ jobs: std: 20 build_type: debug preview: 'ON' + - os: ubuntu-22.04 + c_compiler: gcc-11 + cxx_compiler: g++-11 + std: 20 + build_type: release + preview: 'ON' + cmake_static: -DBUILD_SHARED_LIBS=OFF steps: - uses: actions/checkout@v2 - name: Run testing @@ -172,13 +185,13 @@ jobs: run: | set -x mkdir build && cd build - cmake -DCMAKE_CXX_STANDARD=${{ matrix.std }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + cmake -DCMAKE_CXX_STANDARD=${{ matrix.std }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ${{ matrix.cmake_static }} \ -DCMAKE_CXX_COMPILER=${{ matrix.cxx_compiler }} -DCMAKE_C_COMPILER=${{ matrix.c_compiler }} -DTBB_CPF=${{ matrix.preview }} .. make VERBOSE=1 -j${BUILD_CONCURRENCY} ctest --timeout ${TEST_TIMEOUT} --output-on-failure macos-testing: - name: ${{ matrix.os }}_${{ matrix.cxx_compiler }}_cxx${{ matrix.std }}_${{ matrix.build_type }}_preview=${{ matrix.preview }} + name: ${{ matrix.os }}_${{ matrix.cxx_compiler }}_cxx${{ matrix.std }}_${{ matrix.build_type }}_preview=${{ matrix.preview }}${{ matrix.cmake_static }} runs-on: ['${{ matrix.os }}'] timeout-minutes: 45 strategy: @@ -191,6 +204,13 @@ jobs: std: 14 build_type: relwithdebinfo preview: 'ON' + - os: macos-13 + c_compiler: clang + cxx_compiler: clang++ + std: 20 + build_type: release + preview: 'ON' + cmake_static: -DBUILD_SHARED_LIBS=OFF steps: - uses: actions/checkout@v2 - name: Run testing @@ -198,7 +218,7 @@ jobs: run: | set -x mkdir build && cd build - cmake -DCMAKE_CXX_STANDARD=${{ matrix.std }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + cmake -DCMAKE_CXX_STANDARD=${{ matrix.std }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ${{ matrix.cmake_static }} \ -DCMAKE_CXX_COMPILER=${{ matrix.cxx_compiler }} -DCMAKE_C_COMPILER=${{ matrix.c_compiler }} -DTBB_CPF=${{ matrix.preview }} .. make VERBOSE=1 -j${MACOS_BUILD_CONCURRENCY} ctest --timeout ${TEST_TIMEOUT} --output-on-failure @@ -219,6 +239,15 @@ jobs: build_type: relwithdebinfo preview: 'ON' job_name: windows_cl2019_cxx14_relwithdebinfo_preview=ON + - os: windows-2019 + generator: Visual Studio 16 2019 + c_compiler: cl + cxx_compiler: cl + std: 20 + build_type: release + preview: 'ON' + job_name: windows_cl2019_cxx20_release_preview=ON-DBUILD_SHARED_LIBS=OFF + cmake_static: -DBUILD_SHARED_LIBS=OFF - os: windows-2022 generator: Visual Studio 17 2022 c_compiler: cl @@ -233,7 +262,7 @@ jobs: run: | mkdir build cd build - cmake -G "${{ matrix.generator }}" -A x64 -DCMAKE_CXX_STANDARD=${{ matrix.std }} ` + cmake -G "${{ matrix.generator }}" -A x64 -DCMAKE_CXX_STANDARD=${{ matrix.std }} ${{ matrix.cmake_static }} ` -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DCMAKE_CXX_COMPILER=${{ matrix.cxx_compiler }} ` -DCMAKE_C_COMPILER=${{ matrix.c_compiler }} -DTBB_CPF=${{ matrix.preview }} .. cmake --build . --config ${{ matrix.build_type }} -j -v diff --git a/third-party/tbb/.github/workflows/issue_labeler.yml b/third-party/tbb/.github/workflows/issue_labeler.yml index 418d7bac..80591aa9 100644 --- a/third-party/tbb/.github/workflows/issue_labeler.yml +++ b/third-party/tbb/.github/workflows/issue_labeler.yml @@ -1,4 +1,4 @@ -# Copyright (c) 2023 Intel Corporation +# Copyright (c) 2023-2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,6 +19,8 @@ on: pull_request: types: [opened, edited] +permissions: read-all + jobs: triage: runs-on: ubuntu-latest diff --git a/third-party/tbb/.github/workflows/labeler.yml b/third-party/tbb/.github/workflows/labeler.yml index 8dbb0962..36812ebd 100644 --- a/third-party/tbb/.github/workflows/labeler.yml +++ b/third-party/tbb/.github/workflows/labeler.yml @@ -1,4 +1,4 @@ -# Copyright (c) 2023 Intel Corporation +# Copyright (c) 2023-2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,6 +15,8 @@ name: "Pull Request Labeler" on: - pull_request_target +permissions: read-all + jobs: triage: permissions: diff --git a/third-party/tbb/BUILD.bazel b/third-party/tbb/BUILD.bazel index 3881d684..34f98eba 100644 --- a/third-party/tbb/BUILD.bazel +++ b/third-party/tbb/BUILD.bazel @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2022 Intel Corporation +# Copyright (c) 2021-2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -116,3 +116,16 @@ cc_library( ":tbbmalloc", ], ) + +cc_test( + name = "test_task", + srcs = [ + "test/tbb/test_task.cpp", + ] + glob([ + "test/common/*.h", + ]), + includes = ["test"], + deps = [ + ":tbb", + ], +) diff --git a/third-party/tbb/CMakeLists.txt b/third-party/tbb/CMakeLists.txt index 16ee29ed..19232a99 100644 --- a/third-party/tbb/CMakeLists.txt +++ b/third-party/tbb/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023 Intel Corporation +# Copyright (c) 2020-2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) # Enable CMake policies +if (POLICY CMP0068) + # RPATH settings do not affect install_name on macOS since CMake 3.9 + cmake_policy(SET CMP0068 NEW) +endif() + if (POLICY CMP0091) # The NEW behavior for this policy is to not place MSVC runtime library flags in the default # CMAKE__FLAGS_ cache entries and use CMAKE_MSVC_RUNTIME_LIBRARY abstraction instead. @@ -38,12 +43,6 @@ if (APPLE) endif() endif() -# Until CMake 3.4.0 FindThreads.cmake requires C language enabled. -# Enable C language before CXX to avoid possible override of CMAKE_SIZEOF_VOID_P. -if (CMAKE_VERSION VERSION_LESS 3.4) - enable_language(C) -endif() - file(READ include/oneapi/tbb/version.h _tbb_version_info) string(REGEX REPLACE ".*#define TBB_VERSION_MAJOR ([0-9]+).*" "\\1" _tbb_ver_major "${_tbb_version_info}") string(REGEX REPLACE ".*#define TBB_VERSION_MINOR ([0-9]+).*" "\\1" _tbb_ver_minor "${_tbb_version_info}") @@ -104,9 +103,13 @@ option(TBBMALLOC_BUILD "Enable tbbmalloc build" ON) cmake_dependent_option(TBBMALLOC_PROXY_BUILD "Enable tbbmalloc_proxy build" ON "TBBMALLOC_BUILD" OFF) option(TBB_CPF "Enable preview features of the library" OFF) option(TBB_FIND_PACKAGE "Enable search for external oneTBB using find_package instead of build from sources" OFF) -option(TBB_DISABLE_HWLOC_AUTOMATIC_SEARCH "Disable HWLOC automatic search by pkg-config tool" OFF) +option(TBB_DISABLE_HWLOC_AUTOMATIC_SEARCH "Disable HWLOC automatic search by pkg-config tool" ${CMAKE_CROSSCOMPILING}) option(TBB_ENABLE_IPO "Enable Interprocedural Optimization (IPO) during the compilation" ON) option(TBB_FUZZ_TESTING "Enable fuzz testing" OFF) +option(TBB_INSTALL "Enable installation" ON) +if(APPLE) +option(TBB_BUILD_APPLE_FRAMEWORKS "Build as Apple Frameworks" OFF) +endif() if (NOT DEFINED BUILD_SHARED_LIBS) set(BUILD_SHARED_LIBS ON) @@ -194,7 +197,7 @@ endif() # ------------------------------------------------------------------- # Common dependencies #force -pthread during compilation for Emscripten -if (EMSCRIPTEN) +if (EMSCRIPTEN AND NOT EMSCRIPTEN_WITHOUT_PTHREAD) set(THREADS_HAVE_PTHREAD_ARG TRUE) endif() @@ -230,7 +233,7 @@ else() message(WARNING "TBB compiler settings not found ${TBB_COMPILER_SETTINGS_FILE}") endif() -if (TBB_FIND_PACKAGE OR TBB_DIR) +if (TBB_FIND_PACKAGE AND TBB_DIR) # Allow specifying external TBB to test with. # Do not add main targets and installation instructions in that case. message(STATUS "Using external TBB for testing") @@ -250,34 +253,39 @@ else() else() add_subdirectory(src/tbbbind) endif() + if (TBB_INSTALL) + # ------------------------------------------------------------------- + # Installation instructions + include(CMakePackageConfigHelpers) + + install(DIRECTORY include/ + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + COMPONENT devel) + + install(EXPORT ${PROJECT_NAME}Targets + NAMESPACE TBB:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} + COMPONENT devel) + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake + "include(\${CMAKE_CURRENT_LIST_DIR}/${PROJECT_NAME}Targets.cmake)\n") + if (NOT BUILD_SHARED_LIBS) + file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake + "include(CMakeFindDependencyMacro)\nfind_dependency(Threads)\n") + endif() + + write_basic_package_version_file("${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake" + COMPATIBILITY AnyNewerVersion) - # ------------------------------------------------------------------- - # Installation instructions - include(CMakePackageConfigHelpers) - - install(DIRECTORY include/ - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} - COMPONENT devel) - - install(EXPORT ${PROJECT_NAME}Targets - NAMESPACE TBB:: - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} - COMPONENT devel) - file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake - "include(\${CMAKE_CURRENT_LIST_DIR}/${PROJECT_NAME}Targets.cmake)\n") - - write_basic_package_version_file("${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake" - COMPATIBILITY AnyNewerVersion) - - install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake" - "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake" - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} - COMPONENT devel) - - install(FILES "README.md" - DESTINATION ${CMAKE_INSTALL_DOCDIR} - COMPONENT devel) - # ------------------------------------------------------------------- + install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake" + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} + COMPONENT devel) + + install(FILES "README.md" + DESTINATION ${CMAKE_INSTALL_DOCDIR} + COMPONENT devel) + # ------------------------------------------------------------------- + endif() endif() if (TBB_TEST) diff --git a/third-party/tbb/CONTRIBUTING.md b/third-party/tbb/CONTRIBUTING.md index c8b43708..3048b211 100644 --- a/third-party/tbb/CONTRIBUTING.md +++ b/third-party/tbb/CONTRIBUTING.md @@ -29,11 +29,6 @@ The DCO is an attestation attached to every contribution made by every developer As a contributor, you’ll want to be familiar with the oneTBB project and the repository layout. You should also know how to use it as explained in the [oneTBB documentation](https://oneapi-src.github.io/oneTBB/) and how to set up your build development environment to configure, build, and test oneTBB as explained in the [oneTBB Build System Description](cmake/README.md). -## Issues -If you face a problem, first check out open [oneTBB GitHub issues](https://github.com/oneapi-src/oneTBB/issues) to see if the issue you’d like to address is already reported. You may find users that have encountered the bug you’re finding or have similar ideas for changes or additions. - -You can use issues to report a problem, make a feature request, or add comments on an existing issue. - ## Pull Requests You can find all [open oneTBB pull requests](https://github.com/oneapi-src/oneTBB/pulls) on GitHub. diff --git a/third-party/tbb/INSTALL.md b/third-party/tbb/INSTALL.md index 3c63c9fd..0ac95f87 100644 --- a/third-party/tbb/INSTALL.md +++ b/third-party/tbb/INSTALL.md @@ -61,7 +61,7 @@ You can use the ``install`` components for partial installation. The following install components are supported: - `runtime` - oneTBB runtime package (core shared libraries and `.dll` files on Windows* OS). - `devel` - oneTBB development package (header files, CMake integration files, library symbolic links, and `.lib` files on Windows* OS). -- `tbb4py` - [oneTBB Module for Python](#onetbb-python-module-support). +- `tbb4py` - [oneTBB Module for Python](https://github.com/oneapi-src/oneTBB/blob/master/python/README.md). If you want to install specific components after configuration and build, run: diff --git a/third-party/tbb/MODULE.bazel b/third-party/tbb/MODULE.bazel new file mode 100644 index 00000000..cc6698f0 --- /dev/null +++ b/third-party/tbb/MODULE.bazel @@ -0,0 +1,24 @@ +# Copyright (c) 2021-2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DISCLAIMER: Bazel support is community-based. The maintainers do not +# use Bazel internally. The Bazel build can have security risks or +# optimization gaps. + +module( + name = "onetbb", + compatibility_level = 1, +) + +bazel_dep(name = "platforms", version = "0.0.9") diff --git a/third-party/tbb/README.md b/third-party/tbb/README.md index b96e1fb0..f2bc0a0a 100644 --- a/third-party/tbb/README.md +++ b/third-party/tbb/README.md @@ -23,7 +23,8 @@ oneTBB is a part of [oneAPI](https://oneapi.io). The current branch implements v > **_NOTE:_** Threading Building Blocks (TBB) is now called oneAPI Threading Building Blocks (oneTBB) to highlight that the tool is a part of the oneAPI ecosystem. ## Release Information -Here are [Release Notes](RELEASE_NOTES.md) and [System Requirements](SYSTEM_REQUIREMENTS.md). + +See [Release Notes](RELEASE_NOTES.md) and [System Requirements](SYSTEM_REQUIREMENTS.md). ## Documentation * [oneTBB Specification](https://spec.oneapi.com/versions/latest/elements/oneTBB/source/nested-index.html) @@ -39,7 +40,7 @@ Here are [Release Notes](RELEASE_NOTES.md) and [System Requirements](SYSTEM_REQU See [Installation from Sources](INSTALL.md) to learn how to install oneTBB. ## Support -Please report issues and suggestions via [GitHub issues](https://github.com/oneapi-src/oneTBB/issues). See our [documentation](./CONTRIBUTING.md##Issues) to learn how to work with them. +See our [documentation](./SUPPORT.md) to learn how to request help. ## How to Contribute We welcome community contributions, so check our [Contributing Guidelines](CONTRIBUTING.md) @@ -49,7 +50,6 @@ to learn more. oneAPI Threading Building Blocks is licensed under [Apache License, Version 2.0](LICENSE.txt). By its terms, contributions submitted to the project are also done under that license. - ## Engineering team contacts * [Email us.](mailto:inteltbbdevelopers@intel.com) diff --git a/third-party/tbb/RELEASE_NOTES.md b/third-party/tbb/RELEASE_NOTES.md index 57258416..c9b8e971 100644 --- a/third-party/tbb/RELEASE_NOTES.md +++ b/third-party/tbb/RELEASE_NOTES.md @@ -18,26 +18,25 @@ This document contains changes of oneTBB compared to the last release. ## Table of Contents -- [New Features](#new-features) - [Known Limitations](#known-limitations) - [Fixed Issues](#fixed-issues) -## :tada: New Features -- Since C++17, parallel algorithms and Flow Graph nodes are allowed to accept pointers to the member functions and member objects as the user-provided callables. -- Added missed member functions, such as assignment operators and swap function, to the ``concurrent_queue`` and ``concurrent_bounded_queue`` containers. - ## :rotating_light: Known Limitations -- A static assert will cause compilation failures in oneTBB headers when compiling with clang 12.0.0 or newer if using the LLVM standard library with ``-ffreestanding`` and C++11/14 compiler options. -- An application using Parallel STL algorithms in libstdc++ versions 9 and 10 may fail to compile due to incompatible interface changes between earlier versions of Threading Building Blocks (TBB) and oneAPI Threading Building Blocks (oneTBB). Disable support for Parallel STL algorithms by defining ``PSTL_USE_PARALLEL_POLICIES`` (in libstdc++ 9) or ``_GLIBCXX_USE_TBB_PAR_BACKEND`` (in libstdc++ 10) macro to zero before inclusion of the first standard header file in each translation unit. -- On Linux* OS, if oneAPI Threading Building Blocks (oneTBB) or Threading Building Blocks (TBB) are installed in a system folder like ``/usr/lib64``, the application may fail to link due to the order in which the linker searches for libraries. Use the ``-L`` linker option to specify the correct location of oneTBB library. This issue does not affect the program execution. -- The ``oneapi::tbb::info`` namespace interfaces might unexpectedly change the process affinity mask on Windows* OS systems (see https://github.com/open-mpi/hwloc/issues/366 for details) when using hwloc* version lower than 2.5. -- Using a hwloc* version other than 1.11, 2.0, or 2.5 may cause an undefined behavior on Windows* OS. See https://github.com/open-mpi/hwloc/issues/477 for details. -- The NUMA* topology may be detected incorrectly on Windows* OS machines where the number of NUMA* node threads exceeds the size of 1 processor group. -- On Windows* OS on ARM64*, when compiling an application using oneTBB with the Microsoft* Compiler, the compiler issues a warning C4324 that a structure was padded due to the alignment specifier. Consider suppressing the warning by specifying ``/wd4324`` to the compiler command line. -- oneTBB does not support ``fork()``, to work-around the issue, consider using task_scheduler_handle to join oneTBB worker threads before using fork(). -- C++ exception handling mechanism on Windows* OS on ARM64* might corrupt memory if an exception is thrown from any oneTBB parallel algorithm (see Windows* OS on ARM64* compiler issue: https://developercommunity.visualstudio.com/t/ARM64-incorrect-stack-unwinding-for-alig/1544293). +- The ``oneapi::tbb::info`` namespace interfaces might unexpectedly change the process affinity mask on Windows* OS systems (see https://github.com/open-mpi/hwloc/issues/366 for details) when using hwloc version lower than 2.5. +- Using a hwloc version other than 1.11, 2.0, or 2.5 may cause an undefined behavior on Windows OS. See https://github.com/open-mpi/hwloc/issues/477 for details. +- The NUMA topology may be detected incorrectly on Windows* OS machines where the number of NUMA node threads exceeds the size of 1 processor group. +- On Windows OS on ARM64*, when compiling an application using oneTBB with the Microsoft* Compiler, the compiler issues a warning C4324 that a structure was padded due to the alignment specifier. Consider suppressing the warning by specifying /wd4324 to the compiler command line. +- C++ exception handling mechanism on Windows* OS on ARM64* might corrupt memory if an exception is thrown from any oneTBB parallel algorithm (see Windows* OS on ARM64* compiler issue: https://developercommunity.visualstudio.com/t/ARM64-incorrect-stack-unwinding-for-alig/1544293. +- When CPU resource coordination is enabled, tasks from a lower-priority ``task_arena`` might be executed before tasks from a higher-priority ``task_arena``. + +> **_NOTE:_** To see known limitations that impact all versions of oneTBB, refer to [oneTBB Documentation](https://oneapi-src.github.io/oneTBB/main/intro/limitations.html). + ## :hammer: Fixed Issues -- Fixed the hang in the reserve method of concurrent unordered containers ([GitHub* #1056](http://github.com/oneapi-src/oneTBB/issues/1056)). -- Fixed the C++20 three-way comparison feature detection ([GitHub* #1093](http://github.com/oneapi-src/oneTBB/issues/1093)). -- Fixed oneTBB integration with CMake* in the Conda* environment. +- Fixed ``parallel_for_each`` algorithm behavior for iterators defining ``iterator_concept`` trait instead of ``iterator_category``. +- Fixed the redefinition issue for ``std::min`` and ``std::max`` on Windows* OS ([GitHub* #832](https://github.com/oneapi-src/oneTBB/issues/832)). +- Fixed the incorrect binary search order in ``TBBConfig.cmake``. +- Enabled the oneTBB library search using the pkg-config tool in Conda packages. + +## :octocat: Open-source Contributions Integrated +- Fixed the compiler warning for missing virtual destructor. Contributed by Elias Engelbert Plank (https://github.com/oneapi-src/oneTBB/pull/1215). diff --git a/third-party/tbb/SECURITY.md b/third-party/tbb/SECURITY.md index c4a49dd5..4926041f 100644 --- a/third-party/tbb/SECURITY.md +++ b/third-party/tbb/SECURITY.md @@ -1,7 +1,66 @@ # Security Policy -Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, -impact, severity and mitigation. +As an open-source project, we understand the importance of and responsibility +for security. This Security Policy outlines our guidelines and procedures to +ensure the highest level of security and trust for oneTBB users. -## Reporting a Vulnerability -Please report any security vulnerabilities in this project -[utilizing the guidelines here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html). +## Supported Versions +Security vulnerabilities are fixed in the [latest version][1] +and delivered as a patch release. We don't guarantee security fixes to be +back-ported to older oneTBB versions. + +## Report a Vulnerability +We are very grateful to the security researchers and users that report back +security vulnerabilities. We investigate every report thoroughly. +We strongly encourage you to report security vulnerabilities to us privately, +before disclosing them on public forums or opening a public GitHub* issue. + +Report a vulnerability to us in one of two ways: +* Open a draft **[GitHub* Security Advisory][2]** +* Send an e-mail to: **security@uxlfoundation.org**. +Along with the report, provide the following info: + * A descriptive title. + * Your name and affiliation (if any). + * A description of the technical details of the vulnerabilities. + * A minimal example of the vulnerability so we can reproduce your findings. + * An explanation of who can exploit this vulnerability, and what they gain + doing so. + * Whether this vulnerability is public or known to third parties. If it is, + provide details. + +### When Should I Report a Vulnerability? +* You think you discovered a potential security vulnerability in oneTBB. +* You are unsure how the potential vulnerability affects oneTBB. +* You think you discovered a vulnerability in another project or 3rd party +component on which oneTBB depends. If the issue is not fixed in the 3rd party +component, try to report directly there first. + +### When Should I NOT Report a Vulnerability? +* You got an automated scan hit and are unable to provide details. +* You need help using oneTBB for security. +* You need help applying security-related updates. +* Your issue is not security-related. + +## Security Reports Review Process +We aim to respond quickly to your inquiry and coordinate a fix and +disclosure with you. All confirmed security vulnerabilities will be addressed +according to severity level and impact on oneTBB. Normally, security issues +are fixed in the next planned release. + +## Disclosure Policy +We will publish security advisories using the +[**GitHub Security Advisories feature**][3] +to keep our community well-informed, and will credit you for your findings +unless you prefer to stay anonymous. We request that you refrain from +exploiting the vulnerability or making it public before the official disclosure. + +We will disclose the vulnerabilities and bugs as soon as possible once +mitigation is implemented and available. + +## Feedback on This Policy +If you have any suggestions on how this Policy could be improved, submit +an issue or a pull request to this repository. **Do not** report +potential vulnerabilities or security flaws via a pull request. + +[1]: https://github.com/oneapi-src/oneTBB/releases/latest +[2]: https://github.com/oneapi-src/oneTBB/security/advisories/new +[3]: https://github.com/oneapi-src/oneTBB/security/advisories diff --git a/third-party/tbb/SUPPORT.md b/third-party/tbb/SUPPORT.md new file mode 100644 index 00000000..47bb60a5 --- /dev/null +++ b/third-party/tbb/SUPPORT.md @@ -0,0 +1,35 @@ + + +# oneTBB Support + +We are committed to providing support and assistance to help you make the most out of oneTBB. +Use the following methods if you face any challenges. + +## Issues + +If you have a problem, check out the [GitHub Issues](https://github.com/oneapi-src/oneTBB/issues) to see if the issue you want to address is already reported. +You may find users that have encountered the same bug or have similar ideas for changes or updates. + +You can use issues to report a problem, make a feature request, or add comments on an existing issue. + +## Discussions + +Visit the [GitHub Discussions](https://github.com/oneapi-src/oneTBB/discussions) to engage with the community, ask questions, or help others. + +## Email + +Reach out to us privately via [email](mailto:inteltbbdevelopers@intel.com). \ No newline at end of file diff --git a/third-party/tbb/WASM_Support.md b/third-party/tbb/WASM_Support.md index 67925ee4..8c2f6c1a 100644 --- a/third-party/tbb/WASM_Support.md +++ b/third-party/tbb/WASM_Support.md @@ -16,16 +16,45 @@ # WASM Support +oneTBB extends its capabilities by offering robust support for ``WASM``. + ``WASM`` stands for WebAssembly, a low-level binary format for executing code in web browsers. -It is designed to be a portable target for compilers and to be efficient to parse and execute. +It is designed to be a portable target for compilers and efficient to parse and execute. + +Using oneTBB with WASM, you can take full advantage of parallelism and concurrency while working on web-based applications, interactive websites, and a variety of other WASM-compatible platforms. + +oneTBB offers WASM support through the integration with [Emscripten*](https://emscripten.org/docs/introducing_emscripten/index.html), a powerful toolchain for compiling C and C++ code into WASM-compatible runtimes. + +## Build + +**Prerequisites:** Download and install Emscripten*. See the [instructions](https://emscripten.org/docs/getting_started/downloads.html). + +To build the system, run: + +``` +mkdir build && cd build +emcmake cmake .. -DCMAKE_CXX_COMPILER=em++ -DCMAKE_C_COMPILER=emcc -DTBB_STRICT=OFF -DCMAKE_CXX_FLAGS=-Wno-unused-command-line-argument -DTBB_DISABLE_HWLOC_AUTOMATIC_SEARCH=ON -DBUILD_SHARED_LIBS=ON -DTBB_EXAMPLES=ON -DTBB_TEST=ON +``` +To compile oneTBB without ``pthreads``, set the flag ``-DEMSCRIPTEN_WITHOUT_PTHREAD=true`` in the command above. By default, oneTBB uses the ``pthreads``. +``` +cmake --build . +cmake --install . +``` +Where: + +* ``emcmake`` - a tool that sets up the environment for Emscripten*. +* ``-DCMAKE_CXX_COMPILER=em++`` - specifies the C++ compiler as Emscripten* C++ compiler. +* ``-DCMAKE_C_COMPILER=emcc`` - specifies the C compiler as Emscripten* C compiler. + -WebAssembly aims to provide a fast, efficient, and safe way to run code in web browsers without needing plugins or other software. Code written in a variety of programming languages, including C, C++, Rust and others, can be compiled into WebAssembly format for use in web pages. This allows you to write high-performance applications that run directly in the browser. +> **_NOTE:_** See [CMake documentation](https://github.com/oneapi-src/oneTBB/blob/master/cmake/README.md) to learn about other options. -We currently have an [under development branch that provides you with WASM support](https://github.com/oneapi-src/oneTBB/tree/tbb_wasm). -By using WASM, you can: -* Create highly performant and scalable applications that can meet the demands of modern web-based systems. -* Take advantage of oneTBB features to optimize the performance of your web-based applications. +## Run Test +To run tests, use: +``` +ctest +``` diff --git a/third-party/tbb/WORKSPACE.bazel b/third-party/tbb/WORKSPACE.bazel index 6431b29b..59ba39f7 100644 --- a/third-party/tbb/WORKSPACE.bazel +++ b/third-party/tbb/WORKSPACE.bazel @@ -1,4 +1,4 @@ -# Copyright (c) 2021 Intel Corporation +# Copyright (c) 2021-2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,4 +16,4 @@ # use Bazel internally. The Bazel build can have security risks or # optimization gaps. -workspace(name = "oneTBB") +# WORKSPACE marker file needed by Bazel diff --git a/third-party/tbb/cmake/README.md b/third-party/tbb/cmake/README.md index ff37ad8e..aa811b0f 100644 --- a/third-party/tbb/cmake/README.md +++ b/third-party/tbb/cmake/README.md @@ -14,10 +14,12 @@ TBBMALLOC_BUILD:BOOL - Enable Intel(R) oneAPI Threading Building Blocks (oneTBB) TBBMALLOC_PROXY_BUILD:BOOL - Enable Intel(R) oneAPI Threading Building Blocks (oneTBB) memory allocator proxy build (requires TBBMALLOC_BUILD. ON by default) TBB4PY_BUILD:BOOL - Enable Intel(R) oneAPI Threading Building Blocks (oneTBB) Python module build (OFF by default) TBB_CPF:BOOL - Enable preview features of the library (OFF by default) +TBB_INSTALL:BOOL - Enable installation (ON by default) TBB_INSTALL_VARS:BOOL - Enable auto-generated vars installation(packages generated by `cpack` and `make install` will also include the vars script)(OFF by default) TBB_VALGRIND_MEMCHECK:BOOL - Enable scan for memory leaks using Valgrind (OFF by default) TBB_DISABLE_HWLOC_AUTOMATIC_SEARCH - Disable HWLOC automatic search by pkg-config tool (OFF by default) TBB_ENABLE_IPO - Enable Interprocedural Optimization (IPO) during the compilation (ON by default) +TBB_BUILD_APPLE_FRAMEWORKS - Enable the Apple* frameworks instead of dylibs, only available on the Apple platform. (OFF by default) ``` ## Configure, Build, and Test diff --git a/third-party/tbb/cmake/compilers/Clang.cmake b/third-party/tbb/cmake/compilers/Clang.cmake index 7ce4d46d..f56b5fba 100644 --- a/third-party/tbb/cmake/compilers/Clang.cmake +++ b/third-party/tbb/cmake/compilers/Clang.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023 Intel Corporation +# Copyright (c) 2020-2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,7 +16,9 @@ if (EMSCRIPTEN) set(TBB_EMSCRIPTEN 1) set(TBB_COMMON_COMPILE_FLAGS ${TBB_COMMON_COMPILE_FLAGS} -fexceptions) set(TBB_TEST_LINK_FLAGS ${TBB_COMMON_LINK_FLAGS} -fexceptions -sINITIAL_MEMORY=65536000 -sALLOW_MEMORY_GROWTH=1 -sEXIT_RUNTIME=1) - set_property(TARGET Threads::Threads PROPERTY INTERFACE_LINK_LIBRARIES "-pthread") + if (NOT EMSCRIPTEN_WITHOUT_PTHREAD) + set_property(TARGET Threads::Threads PROPERTY INTERFACE_LINK_LIBRARIES "-pthread") + endif() endif() if (MINGW) @@ -52,7 +54,7 @@ if (NOT TBB_STRICT AND COMMAND tbb_remove_compile_flag) endif() # Enable Intel(R) Transactional Synchronization Extensions (-mrtm) and WAITPKG instructions support (-mwaitpkg) on relevant processors -if (CMAKE_SYSTEM_PROCESSOR MATCHES "(AMD64|amd64|i.86|x86)") +if (CMAKE_SYSTEM_PROCESSOR MATCHES "(AMD64|amd64|i.86|x86)" AND NOT EMSCRIPTEN) set(TBB_COMMON_COMPILE_FLAGS ${TBB_COMMON_COMPILE_FLAGS} -mrtm $<$>:-mwaitpkg>) endif() @@ -66,7 +68,9 @@ endif() set(TBB_COMMON_LINK_LIBS ${CMAKE_DL_LIBS}) -set(TBB_COMMON_COMPILE_FLAGS ${TBB_COMMON_COMPILE_FLAGS} $<$>:-D_FORTIFY_SOURCE=2>) +if (NOT CMAKE_CXX_FLAGS MATCHES "_FORTIFY_SOURCE") + set(TBB_COMMON_COMPILE_FLAGS ${TBB_COMMON_COMPILE_FLAGS} $<$>:-D_FORTIFY_SOURCE=2>) +endif () if (MINGW) list(APPEND TBB_COMMON_COMPILE_FLAGS -U__STRICT_ANSI__) diff --git a/third-party/tbb/cmake/compilers/GNU.cmake b/third-party/tbb/cmake/compilers/GNU.cmake index 08c7f2e5..6fd8d980 100644 --- a/third-party/tbb/cmake/compilers/GNU.cmake +++ b/third-party/tbb/cmake/compilers/GNU.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023 Intel Corporation +# Copyright (c) 2020-2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -71,12 +71,13 @@ endif () set(TBB_COMMON_COMPILE_FLAGS ${TBB_COMMON_COMPILE_FLAGS} -fno-strict-overflow -fno-delete-null-pointer-checks -fwrapv) set(TBB_COMMON_COMPILE_FLAGS ${TBB_COMMON_COMPILE_FLAGS} -Wformat -Wformat-security -Werror=format-security -fstack-protector-strong ) -# -z switch is not supported on MacOS -if (NOT APPLE) +# -z switch is not supported on MacOS and MinGW +if (NOT APPLE AND NOT MINGW) set(TBB_LIB_LINK_FLAGS ${TBB_LIB_LINK_FLAGS} -Wl,-z,relro,-z,now,-z,noexecstack) endif() -set(TBB_COMMON_COMPILE_FLAGS ${TBB_COMMON_COMPILE_FLAGS} $<$>:-D_FORTIFY_SOURCE=2> ) - +if (NOT CMAKE_CXX_FLAGS MATCHES "_FORTIFY_SOURCE") + set(TBB_COMMON_COMPILE_FLAGS ${TBB_COMMON_COMPILE_FLAGS} $<$>:-D_FORTIFY_SOURCE=2> ) +endif () # TBB malloc settings set(TBBMALLOC_LIB_COMPILE_FLAGS -fno-rtti -fno-exceptions) diff --git a/third-party/tbb/cmake/compilers/Intel.cmake b/third-party/tbb/cmake/compilers/Intel.cmake index 582f9a84..531e078e 100644 --- a/third-party/tbb/cmake/compilers/Intel.cmake +++ b/third-party/tbb/cmake/compilers/Intel.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023 Intel Corporation +# Copyright (c) 2020-2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -21,7 +21,11 @@ if (MSVC) elseif (APPLE) include(${CMAKE_CURRENT_LIST_DIR}/AppleClang.cmake) set(TBB_COMMON_COMPILE_FLAGS ${TBB_COMMON_COMPILE_FLAGS} -fstack-protector -Wformat -Wformat-security - $<$>:-fno-omit-frame-pointer -qno-opt-report-embed -D_FORTIFY_SOURCE=2>) + $<$>:-fno-omit-frame-pointer -qno-opt-report-embed>) + if (NOT CMAKE_CXX_FLAGS MATCHES "_FORTIFY_SOURCE") + set(TBB_COMMON_COMPILE_FLAGS ${TBB_COMMON_COMPILE_FLAGS} $<$>:-D_FORTIFY_SOURCE=2>) + endif () + set(TBB_OPENMP_FLAG -qopenmp) set(TBB_IPO_COMPILE_FLAGS $<$>:-ipo>) else() diff --git a/third-party/tbb/cmake/compilers/IntelLLVM.cmake b/third-party/tbb/cmake/compilers/IntelLLVM.cmake index 89d56ae6..a9ebb3e6 100644 --- a/third-party/tbb/cmake/compilers/IntelLLVM.cmake +++ b/third-party/tbb/cmake/compilers/IntelLLVM.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,10 +14,12 @@ if (WIN32) include(${CMAKE_CURRENT_LIST_DIR}/MSVC.cmake) + set(TBB_OPENMP_FLAG /Qopenmp) set(TBB_IPO_COMPILE_FLAGS $<$>:/Qipo>) set(TBB_IPO_LINK_FLAGS $<$>:/INCREMENTAL:NO>) else() include(${CMAKE_CURRENT_LIST_DIR}/Clang.cmake) set(TBB_IPO_COMPILE_FLAGS $<$>:-ipo>) + set(TBB_OPENMP_FLAG -qopenmp) endif() set(TBB_IPO_LINK_FLAGS ${TBB_IPO_LINK_FLAGS} ${TBB_IPO_COMPILE_FLAGS}) diff --git a/third-party/tbb/cmake/compilers/MSVC.cmake b/third-party/tbb/cmake/compilers/MSVC.cmake index 0e0dfd31..6568ec7e 100644 --- a/third-party/tbb/cmake/compilers/MSVC.cmake +++ b/third-party/tbb/cmake/compilers/MSVC.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023 Intel Corporation +# Copyright (c) 2020-2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -33,9 +33,9 @@ if (MSVC_VERSION LESS_EQUAL 1900) set(TBB_TEST_COMPILE_FLAGS ${TBB_TEST_COMPILE_FLAGS} /wd4503) endif() set(TBB_LIB_COMPILE_FLAGS -D_CRT_SECURE_NO_WARNINGS /GS) -set(TBB_COMMON_COMPILE_FLAGS /volatile:iso /FS /EHsc) +set(TBB_COMMON_COMPILE_FLAGS ${TBB_COMMON_COMPILE_FLAGS} /volatile:iso /FS /EHsc) -set(TBB_LIB_LINK_FLAGS ${TBB_LIB_LINK_FLAGS} /DYNAMICBASE /NXCOMPAT) +set(TBB_LIB_LINK_FLAGS ${TBB_LIB_LINK_FLAGS} /DEPENDENTLOADFLAG:0x2000 /DYNAMICBASE /NXCOMPAT) if (TBB_ARCH EQUAL 32) set(TBB_LIB_LINK_FLAGS ${TBB_LIB_LINK_FLAGS} /SAFESEH ) diff --git a/third-party/tbb/cmake/config_generation.cmake b/third-party/tbb/cmake/config_generation.cmake index 0cbdd745..e4ef7bce 100644 --- a/third-party/tbb/cmake/config_generation.cmake +++ b/third-party/tbb/cmake/config_generation.cmake @@ -92,6 +92,7 @@ set(_tbbbind_bin_version ${tbb_gen_cfg_TBBBIND_BINARY_VERSION}) NAMES \${_tbb_component}\${_bin_version}.dll PATHS \${_tbb_root} PATH_SUFFIXES \"redist/\${_tbb_intel_arch}/\${_tbb_subdir}\" \"bin\${_tbb_arch_suffix}/\${_tbb_subdir}\" \"bin\${_tbb_arch_suffix}/\" \"bin\" + NO_DEFAULT_PATH ) if (EXISTS \"\${_tbb_debug_lib}\") @@ -99,6 +100,7 @@ set(_tbbbind_bin_version ${tbb_gen_cfg_TBBBIND_BINARY_VERSION}) NAMES \${_tbb_component}\${_bin_version}_debug.dll PATHS \${_tbb_root} PATH_SUFFIXES \"redist/\${_tbb_intel_arch}/\${_tbb_subdir}\" \"bin\${_tbb_arch_suffix}/\${_tbb_subdir}\" \"bin\${_tbb_arch_suffix}/\" \"bin\" + NO_DEFAULT_PATH ) endif() ") diff --git a/third-party/tbb/cmake/hwloc_detection.cmake b/third-party/tbb/cmake/hwloc_detection.cmake index 47233b17..aaca5a59 100644 --- a/third-party/tbb/cmake/hwloc_detection.cmake +++ b/third-party/tbb/cmake/hwloc_detection.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -46,8 +46,6 @@ endforeach() unset(HWLOC_TARGET_NAME) if (NOT HWLOC_TARGET_EXPLICITLY_DEFINED AND - # No hwloc auto detection for cross compilation - NOT CMAKE_CROSSCOMPILING AND NOT TBB_DISABLE_HWLOC_AUTOMATIC_SEARCH ) find_package(PkgConfig QUIET) diff --git a/third-party/tbb/cmake/templates/TBBConfig.cmake.in b/third-party/tbb/cmake/templates/TBBConfig.cmake.in index 18ac68d3..3131e3dd 100644 --- a/third-party/tbb/cmake/templates/TBBConfig.cmake.in +++ b/third-party/tbb/cmake/templates/TBBConfig.cmake.in @@ -65,6 +65,7 @@ foreach (_tbb_component ${TBB_FIND_COMPONENTS}) NAMES @TBB_LIB_PREFIX@${_tbb_component}${_bin_version}.@TBB_LIB_EXT@ PATHS ${_tbb_root} PATH_SUFFIXES "@TBB_LIB_REL_PATH@/${_tbb_intel_arch}/${_tbb_subdir}" "@TBB_LIB_REL_PATH@${_tbb_arch_suffix}/${_tbb_subdir}" "@TBB_LIB_REL_PATH@${_tbb_arch_suffix}" "@TBB_LIB_REL_PATH@" + NO_DEFAULT_PATH ) if (NOT TBB_FIND_RELEASE_ONLY) @@ -72,6 +73,7 @@ foreach (_tbb_component ${TBB_FIND_COMPONENTS}) NAMES @TBB_LIB_PREFIX@${_tbb_component}${_bin_version}_debug.@TBB_LIB_EXT@ PATHS ${_tbb_root} PATH_SUFFIXES "@TBB_LIB_REL_PATH@/${_tbb_intel_arch}/${_tbb_subdir}" "@TBB_LIB_REL_PATH@${_tbb_arch_suffix}/${_tbb_subdir}" "@TBB_LIB_REL_PATH@${_tbb_arch_suffix}" "@TBB_LIB_REL_PATH@" + NO_DEFAULT_PATH ) endif() diff --git a/third-party/tbb/cmake/utils.cmake b/third-party/tbb/cmake/utils.cmake index 982a633f..21101989 100644 --- a/third-party/tbb/cmake/utils.cmake +++ b/third-party/tbb/cmake/utils.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023 Intel Corporation +# Copyright (c) 2020-2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,31 +23,37 @@ macro(tbb_remove_compile_flag flag) endmacro() macro(tbb_install_target target) - install(TARGETS ${target} - EXPORT TBBTargets - LIBRARY - DESTINATION ${CMAKE_INSTALL_LIBDIR} - NAMELINK_SKIP - COMPONENT runtime - RUNTIME - DESTINATION ${CMAKE_INSTALL_BINDIR} - COMPONENT runtime - ARCHIVE - DESTINATION ${CMAKE_INSTALL_LIBDIR} - COMPONENT devel) - - if (BUILD_SHARED_LIBS) + if (TBB_INSTALL) install(TARGETS ${target} + EXPORT TBBTargets LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - NAMELINK_ONLY - COMPONENT devel) - endif() - if (MSVC AND BUILD_SHARED_LIBS) - install(FILES $ - DESTINATION ${CMAKE_INSTALL_BINDIR} - COMPONENT devel - OPTIONAL) + NAMELINK_SKIP + COMPONENT runtime + RUNTIME + DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT runtime + ARCHIVE + DESTINATION ${CMAKE_INSTALL_LIBDIR} + COMPONENT devel + FRAMEWORK + DESTINATION ${CMAKE_INSTALL_LIBDIR} + COMPONENT runtime + OPTIONAL) + + if (BUILD_SHARED_LIBS) + install(TARGETS ${target} + LIBRARY + DESTINATION ${CMAKE_INSTALL_LIBDIR} + NAMELINK_ONLY + COMPONENT devel) + endif() + if (MSVC AND BUILD_SHARED_LIBS) + install(FILES $ + DESTINATION ${CMAKE_INSTALL_BINDIR} + COMPONENT devel + OPTIONAL) + endif() endif() endmacro() diff --git a/third-party/tbb/cmake/vars_utils.cmake b/third-party/tbb/cmake/vars_utils.cmake index 989fea26..54a9fda1 100644 --- a/third-party/tbb/cmake/vars_utils.cmake +++ b/third-party/tbb/cmake/vars_utils.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,12 +26,20 @@ get_filename_component(TBB_VARS_TEMPLATE_NAME ${PROJECT_SOURCE_DIR}/integration/ string(REPLACE ".in" "" TBB_VARS_NAME ${TBB_VARS_TEMPLATE_NAME}) macro(tbb_gen_vars target) + if (NOT TBB_BUILD_APPLE_FRAMEWORKS) + set(BIN_PATH $) + else() + # For Apple* frameworks, the binaries are placed in a framework bundle. + # When using an Apple* framework, you refer to the bundle, not the binary inside, so we take the bundle's path and go up one level. + # This path will then be used to generate the vars file, and the contents of the vars file will use the bundle's parent directory. + set(BIN_PATH $/..) + endif() if (${CMAKE_PROJECT_NAME} STREQUAL ${PROJECT_NAME}) add_custom_command(TARGET ${target} POST_BUILD COMMAND ${CMAKE_COMMAND} -DBINARY_DIR=${CMAKE_BINARY_DIR} -DSOURCE_DIR=${PROJECT_SOURCE_DIR} - -DBIN_PATH=$ + -DBIN_PATH=${BIN_PATH} -DVARS_TEMPLATE=${TBB_VARS_TEMPLATE} -DVARS_NAME=${TBB_VARS_NAME} -DTBB_INSTALL_VARS=${TBB_INSTALL_VARS} diff --git a/third-party/tbb/doc/conf.py b/third-party/tbb/doc/conf.py index 87593ebf..19da0a4c 100644 --- a/third-party/tbb/doc/conf.py +++ b/third-party/tbb/doc/conf.py @@ -137,10 +137,14 @@ 'use_issues_button': True, 'use_edit_page_button': True, 'repository_branch': 'master', - 'extra_footer': '

Cookies

' } +if BUILD_TYPE != 'oneapi' and BUILD_TYPE != 'dita': + html_theme_options = { + "extra_footer": "
© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others. No license (express or implied, by estoppel or otherwise) to any intellectual property rights is granted by this document, with the sole exception that code included in this document is licensed subject to the Zero-Clause BSD open source license (OBSD), http://opensource.org/licenses/0BSD.

oneTBB is licensed under Apache License Version 2.0. Refer to the LICENSE file for the full license text and copyright notice.
" + } + # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". diff --git a/third-party/tbb/doc/main/intro/Benefits.rst b/third-party/tbb/doc/main/intro/Benefits.rst index b66ea5d1..5058cc71 100644 --- a/third-party/tbb/doc/main/intro/Benefits.rst +++ b/third-party/tbb/doc/main/intro/Benefits.rst @@ -20,7 +20,7 @@ it with any compiler supporting ISO C++. The library differs from typical threading packages in the following ways: -- **oneTBB enables you to specify logical paralleism instead of +- **oneTBB enables you to specify logical parallelism instead of threads**. Most threading packages require you to specify threads. Programming directly in terms of threads can be tedious and lead to inefficient programs, because threads are low-level, heavy constructs diff --git a/third-party/tbb/doc/main/reference/reference.rst b/third-party/tbb/doc/main/reference/reference.rst index ec9fb1e1..833a50ee 100644 --- a/third-party/tbb/doc/main/reference/reference.rst +++ b/third-party/tbb/doc/main/reference/reference.rst @@ -19,6 +19,7 @@ It also describes features that are not included in the oneTBB specification. parallel_for_each_semantics parallel_sort_ranges_extension scalable_memory_pools/malloc_replacement_log + rvalue_reduce Preview features **************** diff --git a/third-party/tbb/doc/main/reference/rvalue_reduce.rst b/third-party/tbb/doc/main/reference/rvalue_reduce.rst new file mode 100644 index 00000000..53880952 --- /dev/null +++ b/third-party/tbb/doc/main/reference/rvalue_reduce.rst @@ -0,0 +1,89 @@ +.. _rvalue_reduce: + +Parallel Reduction for rvalues +============================== + +.. contents:: + :local: + :depth: 1 + +Description +*********** + +|full_name| implementation extends the `ParallelReduceFunc `_ and +`ParallelReduceReduction `_ +to optimize operating with ``rvalues`` using functional form of ``tbb::parallel_reduce`` and ``tbb::parallel_deterministic_reduce`` algorithms. + +API +*** + +Header +------ + +.. code:: cpp + + #include + +ParallelReduceFunc Requirements: Pseudo-Signature, Semantics +------------------------------------------------------------ + +.. cpp:function:: Value Func::operator()(const Range& range, Value&& x) const + +or + +.. cpp:function:: Value Func::operator()(const Range& range, const Value& x) const + + Accumulates the result for a subrange, starting with initial value ``x``. The ``Range`` type must meet the `Range requirements _`. + The ``Value`` type must be the same as a corresponding template parameter for the `parallel_reduce algorithm `_. + + If both ``rvalue`` and ``lvalue`` forms are provided, the ``rvalue`` is preferred. + +ParallelReduceReduction Requirements: Pseudo-Signature, Semantics +----------------------------------------------------------------- + +.. cpp:function:: Value Reduction::operator()(Value&& x, Value&& y) const + +or + +.. cpp:function:: Value Reduction::operator()(const Value& x, const Value& y) const + + Combines the ``x`` and ``y`` results. The ``Value`` type must be the same as a corresponding template parameter for the `parallel_reduce algorithm `_. + + If both ``rvalue`` and ``lvalue`` forms are provided, the ``rvalue`` is preferred. + +Example +******* + +.. code:: cpp + // C++17 + #include + #include + #include + #include + + int main() { + std::vector> sets = ...; + + oneapi::tbb::parallel_reduce(oneapi::tbb::blocked_range(0, sets.size()), + std::set{}, // identity element - empty set + [&](const oneapi::tbb::blocked_range& range, std::set&& value) { + for (size_t i = range.begin(); i < range.end(); ++i) { + // Having value as a non-const rvalue reference allows to efficiently + // transfer nodes from sets[i] without copying/moving the data + value.merge(std::move(sets[i])); + } + return value; + }, + [&](std::set&& x, std::set&& y) { + x.merge(std::move(y)); + return x; + } + ); + } + +.. rubric:: See also + +* `oneapi::tbb::parallel_reduce specification `_ +* `oneapi::tbb::parallel_deterministic_reduce specification `_ +* `ParallelReduceFunc specification `_ +* `ParallelReduceReduction specification `_ diff --git a/third-party/tbb/doc/main/tbb_userguide/Flow_Graph_Reservation.rst b/third-party/tbb/doc/main/tbb_userguide/Flow_Graph_Reservation.rst index 8487c449..44fc2f0a 100644 --- a/third-party/tbb/doc/main/tbb_userguide/Flow_Graph_Reservation.rst +++ b/third-party/tbb/doc/main/tbb_userguide/Flow_Graph_Reservation.rst @@ -63,7 +63,7 @@ messages and do not support ``try_get()`` or ``try_reserve()``. broadcast_node bn(g); buffer_node buf1(g); buffer_node buf2(g); - typedef join_node reserving> join_type; + typedef join_node, reserving> join_type; join_type jn(g); buffer_node buf_out(g); join_type::output_type tuple_out; @@ -71,9 +71,9 @@ messages and do not support ``try_get()`` or ``try_reserve()``. // join_node predecessors are both reservable buffer_nodes - make_edge(buf1,input_port<0>jn)); - make_edge(bn,input_port<0>jn)); // attach a broadcast_node - make_edge(buf2,input_port<1>jn)); + make_edge(buf1,input_port<0>(jn)); + make_edge(bn,input_port<0>(jn)); // attach a broadcast_node + make_edge(buf2,input_port<1>(jn)); make_edge(jn, buf_out); bn.try_put(2); buf1.try_put(3); @@ -81,7 +81,7 @@ messages and do not support ``try_get()`` or ``try_reserve()``. buf2.try_put(7); g.wait_for_all(); while (buf_out.try_get(tuple_out)) { - printf("join_node output == (%d,%d)\n",get<0>tuple_out), get<1>tuple_out) ); + printf("join_node output == (%d,%d)\n",get<0>(tuple_out), get<1>(tuple_out) ); } if(buf1.try_get(icnt)) printf("buf1 had %d\n", icnt); else printf("buf1 was empty\n"); diff --git a/third-party/tbb/doc/main/tbb_userguide/Migration_Guide/Mixing_Two_Runtimes.rst b/third-party/tbb/doc/main/tbb_userguide/Migration_Guide/Mixing_Two_Runtimes.rst index 57582aac..8d467fb6 100644 --- a/third-party/tbb/doc/main/tbb_userguide/Migration_Guide/Mixing_Two_Runtimes.rst +++ b/third-party/tbb/doc/main/tbb_userguide/Migration_Guide/Mixing_Two_Runtimes.rst @@ -46,3 +46,4 @@ TBB possible output: TBB: RML private TBB: Tools support disabled +.. note:: The ``tbbmalloc`` library in oneTBB is fully binary compatible with TBB. diff --git a/third-party/tbb/doc/main/tbb_userguide/concurrent_hash_map.rst b/third-party/tbb/doc/main/tbb_userguide/concurrent_hash_map.rst index cd8482ff..8d9ba3a1 100644 --- a/third-party/tbb/doc/main/tbb_userguide/concurrent_hash_map.rst +++ b/third-party/tbb/doc/main/tbb_userguide/concurrent_hash_map.rst @@ -30,14 +30,14 @@ string occurs in the array ``Data``. // Structure that defines hashing and comparison operations for user's type. struct MyHashCompare { - static size_t hash( const string& x ) { + size_t hash( const string& x ) const { size_t h = 0; for( const char* s = x.c_str(); *s; ++s ) h = (h*17)^*s; return h; } //! True if strings are equal - static bool equal( const string& x, const string& y ) { + bool equal( const string& x, const string& y ) const { return x==y; } }; @@ -128,4 +128,4 @@ any other extant accesses on ``key``. .. toctree:: :maxdepth: 4 - ../tbb_userguide/More_on_HashCompare \ No newline at end of file + ../tbb_userguide/More_on_HashCompare diff --git a/third-party/tbb/doc/make.bat b/third-party/tbb/doc/make.bat index 557ecc5b..14d399a5 100644 --- a/third-party/tbb/doc/make.bat +++ b/third-party/tbb/doc/make.bat @@ -25,7 +25,7 @@ REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) -set SOURCEDIR=doc +set SOURCEDIR=. set BUILDDIR=build if "%1" == "" goto help diff --git a/third-party/tbb/examples/CMakeLists.txt b/third-party/tbb/examples/CMakeLists.txt index 979998c6..16f1c455 100644 --- a/third-party/tbb/examples/CMakeLists.txt +++ b/third-party/tbb/examples/CMakeLists.txt @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(tbb_examples CXX) @@ -66,6 +66,7 @@ tbb_add_example(parallel_for_each parallel_preorder) tbb_add_example(parallel_pipeline square) tbb_add_example(parallel_reduce convex_hull) +tbb_add_example(parallel_reduce pi) tbb_add_example(parallel_reduce primes) tbb_add_example(task_arena fractal) diff --git a/third-party/tbb/examples/README.md b/third-party/tbb/examples/README.md index 318d2d93..037ca4d4 100644 --- a/third-party/tbb/examples/README.md +++ b/third-party/tbb/examples/README.md @@ -19,6 +19,7 @@ This directory contains example usages of oneAPI Threading Building Blocks. | parallel_for_each/parallel_preorder | Parallel preorder traversal of a graph. | parallel_pipeline/square | Another string transformation example that squares numbers read from a file. | parallel_reduce/convex_hull | Parallel version of convex hull algorithm (quick hull). +| parallel_reduce/pi | Parallel version of calculating π by numerical integration. | parallel_reduce/primes | Parallel version of the Sieve of Eratosthenes. | task_arena/fractal |The example calculates two classical Mandelbrot fractals with different concurrency limits. | task_group/sudoku | Compute all solutions for a Sudoku board. diff --git a/third-party/tbb/examples/common/gui/CMakeLists.txt b/third-party/tbb/examples/common/gui/CMakeLists.txt index 8bee0a83..ea8b0060 100644 --- a/third-party/tbb/examples/common/gui/CMakeLists.txt +++ b/third-party/tbb/examples/common/gui/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) set(EXAMPLES_UI_MODE "con" CACHE STRING "EXAMPLES_UI_MODE") diff --git a/third-party/tbb/examples/concurrent_hash_map/count_strings/CMakeLists.txt b/third-party/tbb/examples/concurrent_hash_map/count_strings/CMakeLists.txt index 14d25fa7..77efd2f6 100644 --- a/third-party/tbb/examples/concurrent_hash_map/count_strings/CMakeLists.txt +++ b/third-party/tbb/examples/concurrent_hash_map/count_strings/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(count_strings CXX) diff --git a/third-party/tbb/examples/concurrent_hash_map/count_strings/count_strings.cpp b/third-party/tbb/examples/concurrent_hash_map/count_strings/count_strings.cpp index 2b563cd5..0a230846 100644 --- a/third-party/tbb/examples/concurrent_hash_map/count_strings/count_strings.cpp +++ b/third-party/tbb/examples/concurrent_hash_map/count_strings/count_strings.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -42,7 +42,7 @@ class hash> { (sizeof(std::size_t) == sizeof(unsigned)) ? 2654435769U : 11400714819323198485ULL); std::hash char_hash; -}; // strunt hash +}; // struct hash } // namespace std diff --git a/third-party/tbb/examples/concurrent_priority_queue/shortpath/CMakeLists.txt b/third-party/tbb/examples/concurrent_priority_queue/shortpath/CMakeLists.txt index 8a6d78a0..624a5928 100644 --- a/third-party/tbb/examples/concurrent_priority_queue/shortpath/CMakeLists.txt +++ b/third-party/tbb/examples/concurrent_priority_queue/shortpath/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(shortpath CXX) diff --git a/third-party/tbb/examples/getting_started/sub_string_finder/CMakeLists.txt b/third-party/tbb/examples/getting_started/sub_string_finder/CMakeLists.txt index cf4e6a1b..91792dde 100644 --- a/third-party/tbb/examples/getting_started/sub_string_finder/CMakeLists.txt +++ b/third-party/tbb/examples/getting_started/sub_string_finder/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(sub_string_finder_simple CXX) project(sub_string_finder_extended CXX) diff --git a/third-party/tbb/examples/graph/binpack/CMakeLists.txt b/third-party/tbb/examples/graph/binpack/CMakeLists.txt index 5fc979a5..3d3b7921 100644 --- a/third-party/tbb/examples/graph/binpack/CMakeLists.txt +++ b/third-party/tbb/examples/graph/binpack/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(binpack CXX) diff --git a/third-party/tbb/examples/graph/cholesky/CMakeLists.txt b/third-party/tbb/examples/graph/cholesky/CMakeLists.txt index eeb2649a..2e8273ae 100644 --- a/third-party/tbb/examples/graph/cholesky/CMakeLists.txt +++ b/third-party/tbb/examples/graph/cholesky/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(cholesky CXX) diff --git a/third-party/tbb/examples/graph/dining_philosophers/CMakeLists.txt b/third-party/tbb/examples/graph/dining_philosophers/CMakeLists.txt index 95f7a483..d46af59b 100644 --- a/third-party/tbb/examples/graph/dining_philosophers/CMakeLists.txt +++ b/third-party/tbb/examples/graph/dining_philosophers/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(dining_philosophers CXX) diff --git a/third-party/tbb/examples/graph/fgbzip2/CMakeLists.txt b/third-party/tbb/examples/graph/fgbzip2/CMakeLists.txt index a2034edb..7a9142a5 100644 --- a/third-party/tbb/examples/graph/fgbzip2/CMakeLists.txt +++ b/third-party/tbb/examples/graph/fgbzip2/CMakeLists.txt @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(fgbzip2 CXX) diff --git a/third-party/tbb/examples/graph/logic_sim/CMakeLists.txt b/third-party/tbb/examples/graph/logic_sim/CMakeLists.txt index b33f9156..99e1cc8f 100644 --- a/third-party/tbb/examples/graph/logic_sim/CMakeLists.txt +++ b/third-party/tbb/examples/graph/logic_sim/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(logic_sim CXX) diff --git a/third-party/tbb/examples/graph/som/CMakeLists.txt b/third-party/tbb/examples/graph/som/CMakeLists.txt index 6e759331..c2dd1a80 100644 --- a/third-party/tbb/examples/graph/som/CMakeLists.txt +++ b/third-party/tbb/examples/graph/som/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) include(../../common/cmake/common.cmake) project(som CXX) diff --git a/third-party/tbb/examples/migration/recursive_fibonacci/CMakeLists.txt b/third-party/tbb/examples/migration/recursive_fibonacci/CMakeLists.txt index 5032da23..57e027cf 100644 --- a/third-party/tbb/examples/migration/recursive_fibonacci/CMakeLists.txt +++ b/third-party/tbb/examples/migration/recursive_fibonacci/CMakeLists.txt @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(recursive_fibonacci CXX) @@ -33,7 +33,7 @@ set(EXECUTABLE "$") # `N` - specifies the fibonacci number which would be calculated. # `C` - cutoff that will be used to stop recursive split. # `I` - number of iteration to measure benchmark time. -set(ARGS 30 16 20) +set(ARGS 30 16 20 1) set(PERF_ARGS 50 5 20) add_execution_target(run_recursive_fibonacci recursive_fibonacci ${EXECUTABLE} "${ARGS}") diff --git a/third-party/tbb/examples/migration/recursive_fibonacci/README.md b/third-party/tbb/examples/migration/recursive_fibonacci/README.md index bc66c5d8..1f0341c1 100644 --- a/third-party/tbb/examples/migration/recursive_fibonacci/README.md +++ b/third-party/tbb/examples/migration/recursive_fibonacci/README.md @@ -9,14 +9,15 @@ cmake --build . ## Running the sample ### Predefined make targets -* `make run_recursive_fibonacci` - executes the example with predefined parameters. +* `make run_recursive_fibonacci` - executes the example with predefined parameters (extended testing enabled). * `make perf_run_recursive_fibonacci` - executes the example with suggested parameters to measure the oneTBB performance. ### Application parameters Usage: ``` -recursive_fibonacci N C I +recursive_fibonacci N C I T ``` * `N` - specifies the fibonacci number which would be calculated. * `C` - cutoff that will be used to stop recursive split. * `I` - number of iteration to measure benchmark time. +* `T` - enables extended testing (recycle task in a loop). diff --git a/third-party/tbb/examples/migration/recursive_fibonacci/fibonacci.cpp b/third-party/tbb/examples/migration/recursive_fibonacci/fibonacci.cpp index acf22a49..e4a7c12e 100644 --- a/third-party/tbb/examples/migration/recursive_fibonacci/fibonacci.cpp +++ b/third-party/tbb/examples/migration/recursive_fibonacci/fibonacci.cpp @@ -22,6 +22,7 @@ #include int cutoff; +bool testing_enabled; template std::pair measure(F&& f, @@ -48,6 +49,7 @@ int main(int argc, char* argv[]) { int numbers = argc > 1 ? strtol(argv[1], nullptr, 0) : 50; cutoff = argc > 2 ? strtol(argv[2], nullptr, 0) : 16; unsigned long ntrial = argc > 3 ? (unsigned long)strtoul(argv[3], nullptr, 0) : 20; + testing_enabled = argc > 4 ? (bool)strtol(argv[4], nullptr, 0) : false; auto res = measure(fibonacci_two_tasks, numbers, ntrial); std::cout << "Fibonacci two tasks impl N = " << res.first << " Avg time = " << res.second diff --git a/third-party/tbb/examples/migration/recursive_fibonacci/fibonacci_single_task.h b/third-party/tbb/examples/migration/recursive_fibonacci/fibonacci_single_task.h index 2467f862..dae8895b 100644 --- a/third-party/tbb/examples/migration/recursive_fibonacci/fibonacci_single_task.h +++ b/third-party/tbb/examples/migration/recursive_fibonacci/fibonacci_single_task.h @@ -24,6 +24,7 @@ #include extern int cutoff; +extern bool testing_enabled; long serial_fib_1(int n) { return n < 2 ? n : serial_fib_1(n - 1) + serial_fib_1(n - 2); @@ -38,39 +39,43 @@ struct single_fib_task : task_emulation::base_task { single_fib_task(int n, int* x) : n(n), x(x), s(state::compute) {} - void execute() override { + task_emulation::base_task* execute() override { + task_emulation::base_task* bypass = nullptr; switch (s) { case state::compute : { - compute_impl(); + bypass = compute_impl(); break; } case state::sum : { *x = x_l + x_r; + + if (testing_enabled) { + if (n == cutoff && num_recycles > 0) { + --num_recycles; + bypass = compute_impl(); + } + } + break; } } + return bypass; } - void compute_impl() { + task_emulation::base_task* compute_impl() { + task_emulation::base_task* bypass = nullptr; if (n < cutoff) { *x = serial_fib_1(n); } else { - auto bypass = this->allocate_child_and_increment(n - 2, &x_r); + bypass = this->allocate_child_and_increment(n - 2, &x_r); task_emulation::run_task(this->allocate_child_and_increment(n - 1, &x_l)); // Recycling this->s = state::sum; this->recycle_as_continuation(); - - // Bypass is not supported by task_emulation and next_task executed directly. - // However, the old-TBB bypass behavior can be achieved with - // `return task_group::defer()` (check Migration Guide). - // Consider submit another task if recursion call is not acceptable - // i.e. instead of Direct Body call - // submit task_emulation::run_task(this->allocate_child_and_increment(n - 2, &x_r)); - bypass->operator()(); } + return bypass; } @@ -79,6 +84,7 @@ struct single_fib_task : task_emulation::base_task { state s; int x_l{ 0 }, x_r{ 0 }; + int num_recycles{5}; }; int fibonacci_single_task(int n) { diff --git a/third-party/tbb/examples/migration/recursive_fibonacci/fibonacci_two_tasks.h b/third-party/tbb/examples/migration/recursive_fibonacci/fibonacci_two_tasks.h index 91236625..5d7fd022 100644 --- a/third-party/tbb/examples/migration/recursive_fibonacci/fibonacci_two_tasks.h +++ b/third-party/tbb/examples/migration/recursive_fibonacci/fibonacci_two_tasks.h @@ -33,8 +33,9 @@ long serial_fib(int n) { struct fib_continuation : task_emulation::base_task { fib_continuation(int& s) : sum(s) {} - void execute() override { + task_emulation::base_task* execute() override { sum = x + y; + return nullptr; } int x{ 0 }, y{ 0 }; @@ -44,7 +45,8 @@ struct fib_continuation : task_emulation::base_task { struct fib_computation : task_emulation::base_task { fib_computation(int n, int* x) : n(n), x(x) {} - void execute() override { + task_emulation::base_task* execute() override { + task_emulation::base_task* bypass = nullptr; if (n < cutoff) { *x = serial_fib(n); } @@ -57,15 +59,9 @@ struct fib_computation : task_emulation::base_task { this->recycle_as_child_of(c); n = n - 2; x = &c.y; - - // Bypass is not supported by task_emulation and next_task executed directly. - // However, the old-TBB bypass behavior can be achieved with - // `return task_group::defer()` (check Migration Guide). - // Consider submit another task if recursion call is not acceptable - // i.e. instead of Recycling + Direct Body call - // submit task_emulation::run_task(c.create_child(n - 2, &c.y)); - this->operator()(); + bypass = this; } + return bypass; } int n; diff --git a/third-party/tbb/examples/migration/recursive_fibonacci/task_emulation_layer.h b/third-party/tbb/examples/migration/recursive_fibonacci/task_emulation_layer.h index 3a387127..7252d447 100644 --- a/third-party/tbb/examples/migration/recursive_fibonacci/task_emulation_layer.h +++ b/third-party/tbb/examples/migration/recursive_fibonacci/task_emulation_layer.h @@ -47,32 +47,45 @@ class base_task { public: base_task() = default; - base_task(const base_task& t) : m_parent(t.m_parent), m_child_counter(t.m_child_counter.load()) + base_task(const base_task& t) : m_type(t.m_type), m_parent(t.m_parent), m_child_counter(t.m_child_counter.load()) {} virtual ~base_task() = default; void operator() () const { - base_task* parent_snapshot = m_parent; - const_cast(this)->execute(); - if (m_parent && parent_snapshot == m_parent && m_child_counter == 0) { - if (m_parent->remove_reference() == 0) { + task_type type_snapshot = m_type; + + base_task* bypass = const_cast(this)->execute(); + + if (m_parent && m_type != task_type::recycled) { + if (m_parent->remove_child_reference() == 0) { m_parent->operator()(); - delete m_parent; } } - if (m_child_counter == 0 && m_type == task_type::allocated) { + if (m_type == task_type::allocated) { delete this; } + + if (bypass != nullptr) { + m_type = type_snapshot; + + // Bypass is not supported by task_emulation and next_task executed directly. + // However, the old-TBB bypass behavior can be achieved with + // `return task_group::defer()` (check Migration Guide). + // Consider submit another task if recursion call is not acceptable + // i.e. instead of Direct Body call + // submit task_emulation::run_task(); + bypass->operator()(); + } } - virtual void execute() = 0; + virtual base_task* execute() = 0; template C* allocate_continuation(std::uint64_t ref, Args&&... args) { C* continuation = new C{std::forward(args)...}; - continuation->m_type = task_type::continuation; + continuation->m_type = task_type::allocated; continuation->reset_parent(reset_parent()); continuation->m_child_counter = ref; return continuation; @@ -85,7 +98,7 @@ class base_task { template F create_child_and_increment(Args&&... args) { - add_reference(); + add_child_reference(); return create_child_impl(std::forward(args)...); } @@ -96,35 +109,36 @@ class base_task { template F* allocate_child_and_increment(Args&&... args) { - add_reference(); + add_child_reference(); return allocate_child_impl(std::forward(args)...); } template void recycle_as_child_of(C& c) { + m_type = task_type::recycled; reset_parent(&c); } void recycle_as_continuation() { - m_type = task_type::continuation; + m_type = task_type::recycled; } - void add_reference() { + void add_child_reference() { ++m_child_counter; } - std::uint64_t remove_reference() { + std::uint64_t remove_child_reference() { return --m_child_counter; } protected: enum class task_type { - created, + stack_based, allocated, - continuation + recycled }; - task_type m_type; + mutable task_type m_type; private: template @@ -136,7 +150,7 @@ class base_task { template F create_child_impl(Args&&... args) { F obj{std::forward(args)...}; - obj.m_type = task_type::created; + obj.m_type = task_type::stack_based; obj.reset_parent(this); return obj; } @@ -162,13 +176,14 @@ class base_task { class root_task : public base_task { public: root_task(tbb::task_group& tg) : m_tg(tg), m_callback(m_tg.defer([] { /* Create empty callback to preserve reference for wait. */})) { - add_reference(); - m_type = base_task::task_type::continuation; + add_child_reference(); + m_type = base_task::task_type::allocated; } private: - void execute() override { + base_task* execute() override { m_tg.run(std::move(m_callback)); + return nullptr; } tbb::task_group& m_tg; @@ -178,7 +193,7 @@ class root_task : public base_task { template F create_root_task(tbb::task_group& tg, Args&&... args) { F obj{std::forward(args)...}; - obj.m_type = base_task::task_type::created; + obj.m_type = base_task::task_type::stack_based; obj.reset_parent(new root_task{tg}); return obj; } diff --git a/third-party/tbb/examples/parallel_for/game_of_life/CMakeLists.txt b/third-party/tbb/examples/parallel_for/game_of_life/CMakeLists.txt index 47f7ca7b..59634242 100644 --- a/third-party/tbb/examples/parallel_for/game_of_life/CMakeLists.txt +++ b/third-party/tbb/examples/parallel_for/game_of_life/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(game_of_life CXX) diff --git a/third-party/tbb/examples/parallel_for/polygon_overlay/CMakeLists.txt b/third-party/tbb/examples/parallel_for/polygon_overlay/CMakeLists.txt index cb0475e2..a45aaa68 100644 --- a/third-party/tbb/examples/parallel_for/polygon_overlay/CMakeLists.txt +++ b/third-party/tbb/examples/parallel_for/polygon_overlay/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(polygon_overlay CXX) diff --git a/third-party/tbb/examples/parallel_for/seismic/CMakeLists.txt b/third-party/tbb/examples/parallel_for/seismic/CMakeLists.txt index 9236176b..61675f19 100644 --- a/third-party/tbb/examples/parallel_for/seismic/CMakeLists.txt +++ b/third-party/tbb/examples/parallel_for/seismic/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(seismic CXX) diff --git a/third-party/tbb/examples/parallel_for/tachyon/CMakeLists.txt b/third-party/tbb/examples/parallel_for/tachyon/CMakeLists.txt index 9dc0f83c..752fddef 100644 --- a/third-party/tbb/examples/parallel_for/tachyon/CMakeLists.txt +++ b/third-party/tbb/examples/parallel_for/tachyon/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(tachyon CXX) @@ -39,7 +39,6 @@ add_executable( src/imageio.cpp src/imap.cpp src/intersect.cpp - src/jpeg.cpp src/light.cpp src/objbound.cpp src/parse.cpp diff --git a/third-party/tbb/examples/parallel_for/tachyon/src/imageio.cpp b/third-party/tbb/examples/parallel_for/tachyon/src/imageio.cpp index 30c61d7f..c1c9d762 100644 --- a/third-party/tbb/examples/parallel_for/tachyon/src/imageio.cpp +++ b/third-party/tbb/examples/parallel_for/tachyon/src/imageio.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -59,7 +59,6 @@ #include "imageio.hpp" #include "ppm.hpp" /* PPM files */ #include "tgafile.hpp" /* Truevision Targa files */ -#include "jpeg.hpp" /* JPEG files */ static int fakeimage(char *name, int *xres, int *yres, unsigned char **imgdata) { int i, imgsize; @@ -90,7 +89,7 @@ int readimage(rawimage *img) { rc = readtga(name, &xres, &yres, &imgdata); } else if (strstr(name, ".jpg")) { - rc = readjpeg(name, &xres, &yres, &imgdata); + rc = IMAGEUNSUP; } else if (strstr(name, ".gif")) { rc = IMAGEUNSUP; diff --git a/third-party/tbb/examples/parallel_for_each/parallel_preorder/CMakeLists.txt b/third-party/tbb/examples/parallel_for_each/parallel_preorder/CMakeLists.txt index 235604ab..8e98d360 100644 --- a/third-party/tbb/examples/parallel_for_each/parallel_preorder/CMakeLists.txt +++ b/third-party/tbb/examples/parallel_for_each/parallel_preorder/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(parallel_preorder CXX) diff --git a/third-party/tbb/examples/parallel_pipeline/square/CMakeLists.txt b/third-party/tbb/examples/parallel_pipeline/square/CMakeLists.txt index a32eaaf8..184c787e 100644 --- a/third-party/tbb/examples/parallel_pipeline/square/CMakeLists.txt +++ b/third-party/tbb/examples/parallel_pipeline/square/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(square CXX) diff --git a/third-party/tbb/examples/parallel_reduce/README.md b/third-party/tbb/examples/parallel_reduce/README.md index 481d8e18..0dba80ca 100644 --- a/third-party/tbb/examples/parallel_reduce/README.md +++ b/third-party/tbb/examples/parallel_reduce/README.md @@ -4,4 +4,5 @@ Examples using `parallel_reduce` algorithm. | Code sample name | Description |:--- |:--- | convex_hull | Parallel version of convex hull algorithm (quick hull). +| pi | Parallel version of calculating π by numerical integration. | primes | Parallel version of the Sieve of Eratosthenes. diff --git a/third-party/tbb/examples/parallel_reduce/convex_hull/CMakeLists.txt b/third-party/tbb/examples/parallel_reduce/convex_hull/CMakeLists.txt index de32d1de..0492244a 100644 --- a/third-party/tbb/examples/parallel_reduce/convex_hull/CMakeLists.txt +++ b/third-party/tbb/examples/parallel_reduce/convex_hull/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(convex_hull_bench CXX) project(convex_hull_sample CXX) diff --git a/third-party/tbb/examples/parallel_reduce/pi/CMakeLists.txt b/third-party/tbb/examples/parallel_reduce/pi/CMakeLists.txt new file mode 100644 index 00000000..62ebe022 --- /dev/null +++ b/third-party/tbb/examples/parallel_reduce/pi/CMakeLists.txt @@ -0,0 +1,33 @@ +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.5) + +project(pi CXX) + +include(../../common/cmake/common.cmake) + +set_common_project_settings(tbb) + +add_executable(pi main.cpp pi.cpp) + +target_link_libraries(pi TBB::tbb Threads::Threads) +target_compile_options(pi PRIVATE ${TBB_CXX_STD_FLAG}) + +set(EXECUTABLE "$") +set(ARGS "") +set(PERF_ARGS auto 100000000000) + +add_execution_target(run_pi pi ${EXECUTABLE} "${ARGS}") +add_execution_target(perf_run_pi pi ${EXECUTABLE} "${PERF_ARGS}") diff --git a/third-party/tbb/examples/parallel_reduce/pi/README.md b/third-party/tbb/examples/parallel_reduce/pi/README.md new file mode 100644 index 00000000..be7ce0d4 --- /dev/null +++ b/third-party/tbb/examples/parallel_reduce/pi/README.md @@ -0,0 +1,24 @@ +# Pi Sample +Parallel version of calculating π by numerical integration. + +## Build +To build the sample, run the following commands: +``` +cmake +cmake --build . +``` + +## Run +### Predefined Make Targets +* `make run_pi` - executes the example with predefined parameters +* `make perf_run_pi` - executes the example with suggested parameters to measure the oneTBB performance + +### Application Parameters +You can use the following application parameters: +``` +pi [n-of-threads=value] [n-of-intervals=value] [silent] [-h] [n-of-threads [n-of-intervals]] +``` +* `-h` - prints the help for command-line options. +* `n-of-threads` - the number of threads to use. This number is specified in the low\[:high\] range format, where both ``low`` and, optionally, ``high`` are non-negative integers. You can also use ``auto`` to let the system choose a default number of threads suitable for the platform. +* `n-of-intervals` - the number of intervals to subdivide into. Must be a positive integer. +* `silent` - no output except the elapsed time. diff --git a/third-party/tbb/examples/parallel_reduce/pi/common.h b/third-party/tbb/examples/parallel_reduce/pi/common.h new file mode 100644 index 00000000..0e316854 --- /dev/null +++ b/third-party/tbb/examples/parallel_reduce/pi/common.h @@ -0,0 +1,51 @@ +/* + Copyright (c) 2023 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef TBB_examples_pi_H +#define TBB_examples_pi_H + +#include + +typedef std::size_t number_t; +typedef double pi_t; + +extern const number_t chunk_size; +extern number_t num_intervals; +extern pi_t step; + +extern bool silent; + +inline pi_t pi_kernel(number_t i) { + pi_t dx = (pi_t(i) + pi_t(0.5)) * step; + return pi_t(4.0) / (pi_t(1.0) + dx * dx); +} + +inline double pi_slice_kernel(number_t slice, number_t slice_size = chunk_size) { + pi_t pi = pi_t(0.0); + for (number_t i = slice; i < slice + slice_size; ++i) { + pi += pi_kernel(i); + } + return pi; +} + +struct threading { + threading(int p); + ~threading(); +}; + +double compute_pi_parallel(); + +#endif // TBB_examples_pi_H diff --git a/third-party/tbb/examples/parallel_reduce/pi/main.cpp b/third-party/tbb/examples/parallel_reduce/pi/main.cpp new file mode 100644 index 00000000..81690617 --- /dev/null +++ b/third-party/tbb/examples/parallel_reduce/pi/main.cpp @@ -0,0 +1,100 @@ +/* + Copyright (c) 2023 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/tick_count.h" + +#include "common/utility/get_default_num_threads.hpp" +#include "common/utility/utility.hpp" + +#include "common.h" + +const number_t chunk_size = 4096; // Multiple of 16, to fit float datatype to a vector register. + +// number of intervals +number_t num_intervals = 1000000000; +pi_t step = pi_t(0.0); + +bool silent = false; + +double compute_pi_serial() { + double ret = 0; + + step = pi_t(1.0) / num_intervals; + + number_t tail = num_intervals % chunk_size; + number_t last = num_intervals - tail; + + for (number_t slice = 0; slice < last; slice += chunk_size) { + ret += pi_slice_kernel(slice); + } + ret += pi_slice_kernel(last, tail); + ret *= step; + + return ret; +} + +int main(int argc, char* argv[]) { + try { + tbb::tick_count main_start_time = tbb::tick_count::now(); + // zero number of threads means to run serial version + utility::thread_number_range threads(utility::get_default_num_threads, 0); + + utility::parse_cli_arguments( + argc, + argv, + utility::cli_argument_pack() + //"-h" option for for displaying help is present implicitly + .positional_arg(threads, "n-of-threads", utility::thread_number_range_desc) + .positional_arg(num_intervals, "n-of-intervals", "number of intervals") + .arg(silent, "silent", "no output except time elapsed")); + + for (int p = threads.first; p <= threads.last; p = threads.step(p)) { + pi_t pi; + double compute_time; + if (p == 0) { + //run a serial version + tbb::tick_count compute_start_time = tbb::tick_count::now(); + pi = compute_pi_serial(); + compute_time = (tbb::tick_count::now() - compute_start_time).seconds(); + } + else { + //run a parallel version + threading tp(p); + tbb::tick_count compute_start_time = tbb::tick_count::now(); + pi = compute_pi_parallel(); + compute_time = (tbb::tick_count::now() - compute_start_time).seconds(); + } + + if (!silent) { + if (p == 0) { + std::cout << "Serial run:\tpi = " << pi << "\tcompute time = " << compute_time + << " sec\n"; + } + else { + std::cout << "Parallel run:\tpi = " << pi << "\tcompute time = " << compute_time + << " sec\t on " << p << " threads\n"; + } + } + } + + utility::report_elapsed_time((tbb::tick_count::now() - main_start_time).seconds()); + return 0; + } + catch (std::exception& e) { + std::cerr << "error occurred. error text is :\"" << e.what() << "\"\n"; + return 1; + } +} diff --git a/third-party/tbb/examples/parallel_reduce/pi/pi.cpp b/third-party/tbb/examples/parallel_reduce/pi/pi.cpp new file mode 100644 index 00000000..230752a9 --- /dev/null +++ b/third-party/tbb/examples/parallel_reduce/pi/pi.cpp @@ -0,0 +1,55 @@ +/* + Copyright (c) 2023 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "common.h" +#include "oneapi/tbb/blocked_range.h" +#include "oneapi/tbb/global_control.h" +#include "oneapi/tbb/parallel_reduce.h" + +struct reduce_body { + double my_pi; + reduce_body() : my_pi(0) {} + reduce_body(reduce_body& x, tbb::split) : my_pi(0) {} + void operator()(const tbb::blocked_range& r) { + my_pi += pi_slice_kernel(r.begin(), r.size()); + } + void join(const reduce_body& y) { + my_pi += y.my_pi; + } +}; + +double compute_pi_parallel() { + step = pi_t(1.0) / num_intervals; + + double ret = 0.0; + + reduce_body body; + tbb::parallel_reduce(tbb::blocked_range(0, num_intervals), body); + + ret = body.my_pi * step; + + return ret; +} + +static std::unique_ptr gc; + +threading::threading(int p) { + gc.reset(new tbb::global_control(tbb::global_control::max_allowed_parallelism, p)); +} + +threading::~threading() { + gc.reset(); +} diff --git a/third-party/tbb/examples/parallel_reduce/primes/CMakeLists.txt b/third-party/tbb/examples/parallel_reduce/primes/CMakeLists.txt index dabd9682..987d4656 100644 --- a/third-party/tbb/examples/parallel_reduce/primes/CMakeLists.txt +++ b/third-party/tbb/examples/parallel_reduce/primes/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(primes CXX) diff --git a/third-party/tbb/examples/task_arena/fractal/CMakeLists.txt b/third-party/tbb/examples/task_arena/fractal/CMakeLists.txt index 888428b3..857dae64 100644 --- a/third-party/tbb/examples/task_arena/fractal/CMakeLists.txt +++ b/third-party/tbb/examples/task_arena/fractal/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(fractal CXX) diff --git a/third-party/tbb/examples/task_group/sudoku/CMakeLists.txt b/third-party/tbb/examples/task_group/sudoku/CMakeLists.txt index 5fea9ee6..f514662a 100644 --- a/third-party/tbb/examples/task_group/sudoku/CMakeLists.txt +++ b/third-party/tbb/examples/task_group/sudoku/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021 Intel Corporation +# Copyright (c) 2020-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(sudoku CXX) diff --git a/third-party/tbb/examples/test_all/fibonacci/CMakeLists.txt b/third-party/tbb/examples/test_all/fibonacci/CMakeLists.txt index 5c97e28a..3b2368e0 100644 --- a/third-party/tbb/examples/test_all/fibonacci/CMakeLists.txt +++ b/third-party/tbb/examples/test_all/fibonacci/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021 Intel Corporation +# Copyright (c) 2019-2023 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.5) project(fibonacci CXX) diff --git a/third-party/tbb/include/oneapi/tbb/detail/_concurrent_unordered_base.h b/third-party/tbb/include/oneapi/tbb/detail/_concurrent_unordered_base.h index ade91c33..40829208 100644 --- a/third-party/tbb/include/oneapi/tbb/detail/_concurrent_unordered_base.h +++ b/third-party/tbb/include/oneapi/tbb/detail/_concurrent_unordered_base.h @@ -921,7 +921,7 @@ class concurrent_unordered_base { node_allocator_traits::deallocate(dummy_node_allocator, node, 1); } else { // GCC 11.1 issues a warning here that incorrect destructor might be called for dummy_nodes - #if (__TBB_GCC_VERSION >= 110100 && __TBB_GCC_VERSION < 130000 ) && !__clang__ && !__INTEL_COMPILER + #if (__TBB_GCC_VERSION >= 110100 && __TBB_GCC_VERSION < 140000 ) && !__clang__ && !__INTEL_COMPILER volatile #endif value_node_ptr val_node = static_cast(node); diff --git a/third-party/tbb/include/oneapi/tbb/detail/_machine.h b/third-party/tbb/include/oneapi/tbb/detail/_machine.h index 7a4a1e31..ca481380 100644 --- a/third-party/tbb/include/oneapi/tbb/detail/_machine.h +++ b/third-party/tbb/include/oneapi/tbb/detail/_machine.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -96,7 +96,7 @@ static inline void machine_pause(int32_t delay) { #if __TBB_x86_64 || __TBB_x86_32 while (delay-- > 0) { _mm_pause(); } #elif __ARM_ARCH_7A__ || __aarch64__ - while (delay-- > 0) { __asm__ __volatile__("yield" ::: "memory"); } + while (delay-- > 0) { __asm__ __volatile__("isb sy" ::: "memory"); } #else /* Generic */ (void)delay; // suppress without including _template_helpers.h yield(); diff --git a/third-party/tbb/include/oneapi/tbb/enumerable_thread_specific.h b/third-party/tbb/include/oneapi/tbb/enumerable_thread_specific.h index 34bcab68..caa53fa0 100644 --- a/third-party/tbb/include/oneapi/tbb/enumerable_thread_specific.h +++ b/third-party/tbb/include/oneapi/tbb/enumerable_thread_specific.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -36,7 +36,15 @@ #include "task.h" // for task::suspend_point #if _WIN32 || _WIN64 +#ifndef NOMINMAX +#define NOMINMAX +#define __TBB_DEFINED_NOMINMAX 1 +#endif #include +#if __TBB_DEFINED_NOMINMAX +#undef NOMINMAX +#undef __TBB_DEFINED_NOMINMAX +#endif #else #include #endif diff --git a/third-party/tbb/include/oneapi/tbb/parallel_for.h b/third-party/tbb/include/oneapi/tbb/parallel_for.h index 91c7c44c..37a26135 100644 --- a/third-party/tbb/include/oneapi/tbb/parallel_for.h +++ b/third-party/tbb/include/oneapi/tbb/parallel_for.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -319,7 +319,7 @@ void parallel_for_impl(Index first, Index last, Index step, const Function& f, P template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, Index step, const Function& f) { - parallel_for_impl(first, last, step, f, auto_partitioner()); + parallel_for_impl(first, last, step, f, __TBB_DEFAULT_PARTITIONER()); } //! Parallel iteration over a range of integers with a step provided and simple partitioner template @@ -350,7 +350,7 @@ void parallel_for(Index first, Index last, Index step, const Function& f, affini template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, const Function& f) { - parallel_for_impl(first, last, static_cast(1), f, auto_partitioner()); + parallel_for_impl(first, last, static_cast(1), f, __TBB_DEFAULT_PARTITIONER()); } //! Parallel iteration over a range of integers with a default step value and simple partitioner template @@ -395,7 +395,7 @@ void parallel_for_impl(Index first, Index last, Index step, const Function& f, P template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, Index step, const Function& f, task_group_context &context) { - parallel_for_impl(first, last, step, f, auto_partitioner(), context); + parallel_for_impl(first, last, step, f, __TBB_DEFAULT_PARTITIONER(), context); } //! Parallel iteration over a range of integers with explicit step, task group context, and simple partitioner template @@ -426,7 +426,7 @@ void parallel_for(Index first, Index last, Index step, const Function& f, affini template __TBB_requires(parallel_for_index && parallel_for_function) void parallel_for(Index first, Index last, const Function& f, task_group_context &context) { - parallel_for_impl(first, last, static_cast(1), f, auto_partitioner(), context); + parallel_for_impl(first, last, static_cast(1), f, __TBB_DEFAULT_PARTITIONER(), context); } //! Parallel iteration over a range of integers with a default step value, explicit task group context, and simple partitioner template diff --git a/third-party/tbb/include/oneapi/tbb/parallel_for_each.h b/third-party/tbb/include/oneapi/tbb/parallel_for_each.h index 56dbeb41..ab0b3453 100644 --- a/third-party/tbb/include/oneapi/tbb/parallel_for_each.h +++ b/third-party/tbb/include/oneapi/tbb/parallel_for_each.h @@ -407,6 +407,34 @@ class parallel_for_body_wrapper { template using tag = typename std::iterator_traits::iterator_category; +#if __TBB_CPP20_PRESENT +template +struct move_iterator_dispatch_helper { + using type = It; +}; + +// Until C++23, std::move_iterator::iterator_concept always defines +// to std::input_iterator_tag and hence std::forward_iterator concept +// always evaluates to false, so std::move_iterator dispatch should be +// made according to the base iterator type. +template +struct move_iterator_dispatch_helper> { + using type = It; +}; + +template +using iterator_tag_dispatch_impl = + std::conditional_t, + std::random_access_iterator_tag, + std::conditional_t, + std::forward_iterator_tag, + std::input_iterator_tag>>; + +template +using iterator_tag_dispatch = + iterator_tag_dispatch_impl::type>; + +#else template using iterator_tag_dispatch = typename std::conditional< @@ -418,6 +446,7 @@ using iterator_tag_dispatch = typename std::input_iterator_tag >::type >::type; +#endif // __TBB_CPP20_PRESENT template using feeder_is_required = tbb::detail::void_t(), diff --git a/third-party/tbb/include/oneapi/tbb/parallel_reduce.h b/third-party/tbb/include/oneapi/tbb/parallel_reduce.h index 401ad004..205c97ef 100644 --- a/third-party/tbb/include/oneapi/tbb/parallel_reduce.h +++ b/third-party/tbb/include/oneapi/tbb/parallel_reduce.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -42,16 +42,16 @@ concept parallel_reduce_body = splittable && template concept parallel_reduce_function = std::invocable&, - const Range&, const Value&> && + const Range&, Value&&> && std::convertible_to&, - const Range&, const Value&>, + const Range&, Value&&>, Value>; template concept parallel_reduce_combine = std::invocable&, - const Value&, const Value&> && + Value&&, Value&&> && std::convertible_to&, - const Value&, const Value&>, + Value&&, Value&&>, Value>; } // namespace d0 @@ -390,14 +390,15 @@ class lambda_reduce_body { , my_value(other.my_identity_element) { } void operator()(Range& range) { - my_value = tbb::detail::invoke(my_real_body, range, const_cast(my_value)); + my_value = tbb::detail::invoke(my_real_body, range, std::move(my_value)); } + void join( lambda_reduce_body& rhs ) { - my_value = tbb::detail::invoke(my_reduction, const_cast(my_value), - const_cast(rhs.my_value)); + my_value = tbb::detail::invoke(my_reduction, std::move(my_value), std::move(rhs.my_value)); } - Value result() const { - return my_value; + + __TBB_nodiscard Value&& result() && noexcept { + return std::move(my_value); } }; @@ -514,7 +515,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body body(identity, real_body, reduction); start_reduce,const __TBB_DEFAULT_PARTITIONER> ::run(range, body, __TBB_DEFAULT_PARTITIONER() ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with reduction and simple_partitioner. @@ -527,7 +528,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body body(identity, real_body, reduction); start_reduce,const simple_partitioner> ::run(range, body, partitioner ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with reduction and auto_partitioner @@ -540,7 +541,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body body(identity, real_body, reduction); start_reduce,const auto_partitioner> ::run( range, body, partitioner ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with reduction and static_partitioner @@ -553,7 +554,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body body(identity, real_body, reduction); start_reduce,const static_partitioner> ::run( range, body, partitioner ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with reduction and affinity_partitioner @@ -566,7 +567,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body body(identity, real_body, reduction); start_reduce,affinity_partitioner> ::run( range, body, partitioner ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with reduction, default partitioner and user-supplied context. @@ -579,7 +580,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body body(identity, real_body, reduction); start_reduce,const __TBB_DEFAULT_PARTITIONER> ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with reduction, simple partitioner and user-supplied context. @@ -592,7 +593,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body body(identity, real_body, reduction); start_reduce,const simple_partitioner> ::run( range, body, partitioner, context ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with reduction, auto_partitioner and user-supplied context @@ -605,7 +606,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body body(identity, real_body, reduction); start_reduce,const auto_partitioner> ::run( range, body, partitioner, context ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with reduction, static_partitioner and user-supplied context @@ -618,7 +619,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body body(identity, real_body, reduction); start_reduce,const static_partitioner> ::run( range, body, partitioner, context ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with reduction, affinity_partitioner and user-supplied context @@ -631,7 +632,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body body(identity, real_body, reduction); start_reduce,affinity_partitioner> ::run( range, body, partitioner, context ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with deterministic reduction and default simple partitioner. @@ -704,7 +705,7 @@ Value parallel_deterministic_reduce( const Range& range, const Value& identity, lambda_reduce_body body(identity, real_body, reduction); start_deterministic_reduce, const simple_partitioner> ::run(range, body, partitioner); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with deterministic reduction and static partitioner. @@ -716,7 +717,7 @@ Value parallel_deterministic_reduce( const Range& range, const Value& identity, lambda_reduce_body body(identity, real_body, reduction); start_deterministic_reduce, const static_partitioner> ::run(range, body, partitioner); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. @@ -739,7 +740,7 @@ Value parallel_deterministic_reduce( const Range& range, const Value& identity, lambda_reduce_body body(identity, real_body, reduction); start_deterministic_reduce, const simple_partitioner> ::run(range, body, partitioner, context); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. @@ -752,7 +753,7 @@ Value parallel_deterministic_reduce( const Range& range, const Value& identity, lambda_reduce_body body(identity, real_body, reduction); start_deterministic_reduce, const static_partitioner> ::run(range, body, partitioner, context); - return body.result(); + return std::move(body).result(); } //@} diff --git a/third-party/tbb/include/oneapi/tbb/parallel_scan.h b/third-party/tbb/include/oneapi/tbb/parallel_scan.h index 6d2a4d64..d624f7eb 100644 --- a/third-party/tbb/include/oneapi/tbb/parallel_scan.h +++ b/third-party/tbb/include/oneapi/tbb/parallel_scan.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -562,7 +562,7 @@ class lambda_scan_body { template __TBB_requires(tbb_range && parallel_scan_body) void parallel_scan( const Range& range, Body& body ) { - start_scan::run(range,body,__TBB_DEFAULT_PARTITIONER()); + start_scan::run(range,body,__TBB_DEFAULT_PARTITIONER()); } //! Parallel prefix with simple_partitioner diff --git a/third-party/tbb/include/oneapi/tbb/version.h b/third-party/tbb/include/oneapi/tbb/version.h index db4f5f20..fff3e7e2 100644 --- a/third-party/tbb/include/oneapi/tbb/version.h +++ b/third-party/tbb/include/oneapi/tbb/version.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -29,18 +29,22 @@ // Product version #define TBB_VERSION_MAJOR 2021 // Update version -#define TBB_VERSION_MINOR 11 +#define TBB_VERSION_MINOR 13 // "Patch" version for custom releases #define TBB_VERSION_PATCH 0 // Suffix string #define __TBB_VERSION_SUFFIX "" // Full official version string -#define TBB_VERSION_STRING __TBB_STRING(TBB_VERSION_MAJOR) "." __TBB_STRING(TBB_VERSION_MINOR) __TBB_VERSION_SUFFIX +#define TBB_VERSION_STRING \ + __TBB_STRING(TBB_VERSION_MAJOR) "." \ + __TBB_STRING(TBB_VERSION_MINOR) "." \ + __TBB_STRING(TBB_VERSION_PATCH) \ + __TBB_VERSION_SUFFIX // OneAPI oneTBB specification version #define ONETBB_SPEC_VERSION "1.0" // Full interface version -#define TBB_INTERFACE_VERSION 12110 +#define TBB_INTERFACE_VERSION 12130 // Major interface version #define TBB_INTERFACE_VERSION_MAJOR (TBB_INTERFACE_VERSION/1000) // Minor interface version @@ -51,37 +55,37 @@ #define __TBB_BINARY_VERSION 12 //! TBB_VERSION support -#ifndef ENDL -#define ENDL "\n" +#ifndef TBB_ENDL +#define TBB_ENDL "\n" #endif //TBB_REVAMP_TODO: consider enabling version_string.ver generation //TBB_REVAMP_TODO: #include "version_string.ver" -#define __TBB_ONETBB_SPEC_VERSION(N) #N ": SPECIFICATION VERSION\t" ONETBB_SPEC_VERSION ENDL -#define __TBB_VERSION_NUMBER(N) #N ": VERSION\t\t" TBB_VERSION_STRING ENDL -#define __TBB_INTERFACE_VERSION_NUMBER(N) #N ": INTERFACE VERSION\t" __TBB_STRING(TBB_INTERFACE_VERSION) ENDL +#define __TBB_ONETBB_SPEC_VERSION(N) #N ": SPECIFICATION VERSION\t" ONETBB_SPEC_VERSION TBB_ENDL +#define __TBB_VERSION_NUMBER(N) #N ": VERSION\t\t" TBB_VERSION_STRING TBB_ENDL +#define __TBB_INTERFACE_VERSION_NUMBER(N) #N ": INTERFACE VERSION\t" __TBB_STRING(TBB_INTERFACE_VERSION) TBB_ENDL #ifndef TBB_USE_DEBUG - #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\tundefined" ENDL + #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\tundefined" TBB_ENDL #elif TBB_USE_DEBUG==0 - #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t0" ENDL + #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t0" TBB_ENDL #elif TBB_USE_DEBUG==1 - #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t1" ENDL + #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t1" TBB_ENDL #elif TBB_USE_DEBUG==2 - #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t2" ENDL + #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t2" TBB_ENDL #else #error Unexpected value for TBB_USE_DEBUG #endif #ifndef TBB_USE_ASSERT - #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\tundefined" ENDL + #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\tundefined" TBB_ENDL #elif TBB_USE_ASSERT==0 - #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t0" ENDL + #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t0" TBB_ENDL #elif TBB_USE_ASSERT==1 - #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t1" ENDL + #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t1" TBB_ENDL #elif TBB_USE_ASSERT==2 - #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t2" ENDL + #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t2" TBB_ENDL #else #error Unexpected value for TBB_USE_ASSERT #endif diff --git a/third-party/tbb/integration/linux/modulefiles/tbb b/third-party/tbb/integration/linux/modulefiles/tbb index ab08c352..b8c695ed 100644 --- a/third-party/tbb/integration/linux/modulefiles/tbb +++ b/third-party/tbb/integration/linux/modulefiles/tbb @@ -66,3 +66,4 @@ prepend-path CPATH "$tbbroot/include" prepend-path LIBRARY_PATH "$tbbroot/lib" prepend-path LD_LIBRARY_PATH "$tbbroot/lib" prepend-path CMAKE_PREFIX_PATH "$tbbroot" +prepend-path PKG_CONFIG_PATH "$tbbroot/lib/pkgconfig" diff --git a/third-party/tbb/integration/linux/modulefiles/tbb32 b/third-party/tbb/integration/linux/modulefiles/tbb32 index 9d0efc5a..db341351 100644 --- a/third-party/tbb/integration/linux/modulefiles/tbb32 +++ b/third-party/tbb/integration/linux/modulefiles/tbb32 @@ -66,3 +66,4 @@ prepend-path CPATH "$tbbroot/include32:$tbbroot/include" prepend-path LIBRARY_PATH "$tbbroot/lib32" prepend-path LD_LIBRARY_PATH "$tbbroot/lib32" prepend-path CMAKE_PREFIX_PATH "$tbbroot" +prepend-path PKG_CONFIG_PATH "$tbbroot/lib32/pkgconfig" diff --git a/third-party/tbb/integration/windows/nuget/inteltbb.devel.win.targets b/third-party/tbb/integration/windows/nuget/inteltbb.devel.win.targets index ab1f244f..1c94a12c 100644 --- a/third-party/tbb/integration/windows/nuget/inteltbb.devel.win.targets +++ b/third-party/tbb/integration/windows/nuget/inteltbb.devel.win.targets @@ -1,6 +1,6 @@ - $(MSBuildThisFileDirectory)..\..\lib\native\include;%(AdditionalIncludeDirectories) + $(MSBuildThisFileDirectory)..\..\build\native\include;%(AdditionalIncludeDirectories) TBB_USE_DEBUG;%(PreprocessorDefinitions) @@ -27,25 +27,25 @@ - $(MSBuildThisFileDirectory)..\..\lib\native\win-x86;%(AdditionalLibraryDirectories) + $(MSBuildThisFileDirectory)..\..\build\native\win-x86;%(AdditionalLibraryDirectories) tbb12.lib;tbbmalloc.lib;tbbmalloc_proxy.lib;%(AdditionalDependencies) - $(MSBuildThisFileDirectory)..\..\lib\native\win-x64;%(AdditionalLibraryDirectories) + $(MSBuildThisFileDirectory)..\..\build\native\win-x64;%(AdditionalLibraryDirectories) tbb12.lib;tbbmalloc.lib;tbbmalloc_proxy.lib;%(AdditionalDependencies) - $(MSBuildThisFileDirectory)..\..\lib\native\win-x86;%(AdditionalLibraryDirectories) + $(MSBuildThisFileDirectory)..\..\build\native\win-x86;%(AdditionalLibraryDirectories) tbb12_debug.lib;tbbmalloc_debug.lib;tbbmalloc_proxy_debug.lib;%(AdditionalDependencies) - $(MSBuildThisFileDirectory)..\..\lib\native\win-x64;%(AdditionalLibraryDirectories) + $(MSBuildThisFileDirectory)..\..\build\native\win-x64;%(AdditionalLibraryDirectories) tbb12_debug.lib;tbbmalloc_debug.lib;tbbmalloc_proxy_debug.lib;%(AdditionalDependencies) diff --git a/third-party/tbb/python/tbb/pool.py b/third-party/tbb/python/tbb/pool.py index a372324d..dd5c8190 100644 --- a/third-party/tbb/python/tbb/pool.py +++ b/third-party/tbb/python/tbb/pool.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016-2023 Intel Corporation +# Copyright (c) 2016-2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -89,8 +89,8 @@ class Pool(object): def __init__(self, nworkers=0, name="Pool"): """ - \param nworkers (integer) number of worker threads to start - \param name (string) prefix for the worker threads' name + :param nworkers (integer) number of worker threads to start + :param name (string) prefix for the worker threads' name """ self._closed = False self._tasks = task_group() @@ -268,8 +268,8 @@ class Job: def __init__(self, func, args, kwds, apply_result): """ - \param func/args/kwds used to call the function - \param apply_result ApplyResult object that holds the result + :param func/args/kwds used to call the function + :param apply_result ApplyResult object that holds the result of the function call """ self._func = func @@ -317,10 +317,10 @@ class ApplyResult(object): def __init__(self, collector=None, callback=None): """ - \param collector when not None, the notify_ready() method of + :param collector when not None, the notify_ready() method of the collector will be called when the result from the Job is ready - \param callback when not None, function to call when the + :param callback when not None, function to call when the result becomes available (this is the parameter passed to the Pool::*_async() methods. """ @@ -404,7 +404,7 @@ class AbstractResultCollector(object): def __init__(self, to_notify): """ - \param to_notify ApplyResult object to notify when all the + :param to_notify ApplyResult object to notify when all the results we're waiting for become available. Can be None. """ self._to_notify = to_notify @@ -414,7 +414,7 @@ def register_result(self, apply_result): always be called BEFORE the Jobs get submitted to the work queue, and BEFORE the __iter__ and _get_result() methods can be called - \param apply_result ApplyResult object to add in our collection + :param apply_result ApplyResult object to add in our collection """ raise NotImplementedError("Children classes must implement it") @@ -422,7 +422,7 @@ def notify_ready(self, apply_result): """Called by the ApplyResult object (already registered via register_result()) that it is now ready (ie. the Job's result is available or an exception has been raised). - \param apply_result ApplyResult object telling us that the job + :param apply_result ApplyResult object telling us that the job has been processed """ raise NotImplementedError("Children classes must implement it") @@ -431,8 +431,8 @@ def _get_result(self, idx, timeout=None): """Called by the CollectorIterator object to retrieve the result's values one after another (order defined by the implementation) - \param idx The index of the result we want, wrt collector's order - \param timeout integer telling how long to wait (in seconds) + :param idx The index of the result we want, wrt collector's order + :param timeout integer telling how long to wait (in seconds) for the result at index idx to be available, or None (wait forever) """ @@ -450,7 +450,7 @@ class CollectorIterator(object): AbstractResultCollector::__iter__() method""" def __init__(self, collector): - """\param AbstractResultCollector instance""" + """:param AbstractResultCollector instance""" self._collector = collector self._idx = 0 @@ -486,7 +486,7 @@ class UnorderedResultCollector(AbstractResultCollector): def __init__(self, to_notify=None): """ - \param to_notify ApplyResult object to notify when all the + :param to_notify ApplyResult object to notify when all the results we're waiting for become available. Can be None. """ AbstractResultCollector.__init__(self, to_notify) @@ -499,7 +499,7 @@ def register_result(self, apply_result): always be called BEFORE the Jobs get submitted to the work queue, and BEFORE the __iter__ and _get_result() methods can be called - \param apply_result ApplyResult object to add in our collection + :param apply_result ApplyResult object to add in our collection """ self._expected += 1 @@ -507,8 +507,8 @@ def _get_result(self, idx, timeout=None): """Called by the CollectorIterator object to retrieve the result's values one after another, in the order the results have become available. - \param idx The index of the result we want, wrt collector's order - \param timeout integer telling how long to wait (in seconds) + :param idx The index of the result we want, wrt collector's order + :param timeout integer telling how long to wait (in seconds) for the result at index idx to be available, or None (wait forever) """ @@ -535,7 +535,7 @@ def notify_ready(self, apply_result=None): """Called by the ApplyResult object (already registered via register_result()) that it is now ready (ie. the Job's result is available or an exception has been raised). - \param apply_result ApplyResult object telling us that the job + :param apply_result ApplyResult object telling us that the job has been processed """ first_item = False @@ -560,9 +560,9 @@ class OrderedResultCollector(AbstractResultCollector): def __init__(self, to_notify=None, as_iterator=True): """ - \param to_notify ApplyResult object to notify when all the + :param to_notify ApplyResult object to notify when all the results we're waiting for become available. Can be None. - \param as_iterator boolean telling whether the result value + :param as_iterator boolean telling whether the result value set on to_notify should be an iterator (available as soon as 1 result arrived) or a list (available only after the last result arrived) @@ -578,7 +578,7 @@ def register_result(self, apply_result): always be called BEFORE the Jobs get submitted to the work queue, and BEFORE the __iter__ and _get_result() methods can be called - \param apply_result ApplyResult object to add in our collection + :param apply_result ApplyResult object to add in our collection """ self._results.append(apply_result) self._remaining += 1 @@ -587,8 +587,8 @@ def _get_result(self, idx, timeout=None): """Called by the CollectorIterator object to retrieve the result's values one after another (order defined by the implementation) - \param idx The index of the result we want, wrt collector's order - \param timeout integer telling how long to wait (in seconds) + :param idx The index of the result we want, wrt collector's order + :param timeout integer telling how long to wait (in seconds) for the result at index idx to be available, or None (wait forever) """ @@ -600,7 +600,7 @@ def notify_ready(self, apply_result): """Called by the ApplyResult object (already registered via register_result()) that it is now ready (ie. the Job's result is available or an exception has been raised). - \param apply_result ApplyResult object telling us that the job + :param apply_result ApplyResult object telling us that the job has been processed """ got_first = False diff --git a/third-party/tbb/src/tbb/CMakeLists.txt b/third-party/tbb/src/tbb/CMakeLists.txt index e3df9fc9..b996c736 100644 --- a/third-party/tbb/src/tbb/CMakeLists.txt +++ b/third-party/tbb/src/tbb/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023 Intel Corporation +# Copyright (c) 2020-2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -126,59 +126,71 @@ target_link_libraries(tbb ${TBB_COMMON_LINK_LIBS} ) -tbb_install_target(tbb) - -if (MSVC) - # Create a copy of target linker file (tbb[_debug].lib) with legacy name (tbb[_debug].lib) - # to support previous user experience for linkage. - install(FILES - $ - DESTINATION lib - CONFIGURATIONS RelWithDebInfo Release MinSizeRel - RENAME tbb.lib - COMPONENT devel - ) - - install(FILES - $ - DESTINATION lib - CONFIGURATIONS Debug - RENAME tbb_debug.lib - COMPONENT devel - ) -endif() - -set(_tbb_pc_lib_name tbb) - -if (WIN32) - set(_tbb_pc_lib_name ${_tbb_pc_lib_name}${TBB_BINARY_VERSION}) -endif() - -if (CMAKE_SIZEOF_VOID_P EQUAL 8) - set(TBB_PC_NAME tbb) -else() - set(TBB_PC_NAME tbb32) +if(TBB_BUILD_APPLE_FRAMEWORKS) + set_target_properties(tbb PROPERTIES + FRAMEWORK TRUE + FRAMEWORK_VERSION ${TBB_BINARY_VERSION}.${TBB_BINARY_MINOR_VERSION} + XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER com.intel.tbb + MACOSX_FRAMEWORK_IDENTIFIER com.intel.tbb + MACOSX_FRAMEWORK_BUNDLE_VERSION ${TBB_BINARY_VERSION}.${TBB_BINARY_MINOR_VERSION} + MACOSX_FRAMEWORK_SHORT_VERSION_STRING ${TBB_BINARY_VERSION}) endif() -set(_prefix_for_pc_file "${CMAKE_INSTALL_PREFIX}") - -if (IS_ABSOLUTE "${CMAKE_INSTALL_LIBDIR}") - set(_libdir_for_pc_file "${CMAKE_INSTALL_LIBDIR}") -else() - set(_libdir_for_pc_file "\${prefix}/${CMAKE_INSTALL_LIBDIR}") -endif() +tbb_install_target(tbb) -if (IS_ABSOLUTE "${CMAKE_INSTALL_INCLUDEDIR}") - set(_includedir_for_pc_file "${CMAKE_INSTALL_INCLUDEDIR}") -else() - set(_includedir_for_pc_file "\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}") +if (TBB_INSTALL) + if (MSVC) + # Create a copy of target linker file (tbb[_debug].lib) with legacy name (tbb[_debug].lib) + # to support previous user experience for linkage. + install(FILES + $ + DESTINATION lib + CONFIGURATIONS RelWithDebInfo Release MinSizeRel + RENAME tbb.lib + COMPONENT devel + ) + + install(FILES + $ + DESTINATION lib + CONFIGURATIONS Debug + RENAME tbb_debug.lib + COMPONENT devel + ) + endif() + + set(_tbb_pc_lib_name tbb) + + if (WIN32) + set(_tbb_pc_lib_name ${_tbb_pc_lib_name}${TBB_BINARY_VERSION}) + endif() + + if (CMAKE_SIZEOF_VOID_P EQUAL 8) + set(TBB_PC_NAME tbb) + else() + set(TBB_PC_NAME tbb32) + endif() + + set(_prefix_for_pc_file "${CMAKE_INSTALL_PREFIX}") + + if (IS_ABSOLUTE "${CMAKE_INSTALL_LIBDIR}") + set(_libdir_for_pc_file "${CMAKE_INSTALL_LIBDIR}") + else() + set(_libdir_for_pc_file "\${prefix}/${CMAKE_INSTALL_LIBDIR}") + endif() + + if (IS_ABSOLUTE "${CMAKE_INSTALL_INCLUDEDIR}") + set(_includedir_for_pc_file "${CMAKE_INSTALL_INCLUDEDIR}") + else() + set(_includedir_for_pc_file "\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}") + endif() + + configure_file(${PROJECT_SOURCE_DIR}/integration/pkg-config/tbb.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${TBB_PC_NAME}.pc @ONLY) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${TBB_PC_NAME}.pc + DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/ + COMPONENT devel) endif() -configure_file(${PROJECT_SOURCE_DIR}/integration/pkg-config/tbb.pc.in ${CMAKE_CURRENT_BINARY_DIR}/${TBB_PC_NAME}.pc @ONLY) -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${TBB_PC_NAME}.pc - DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/ - COMPONENT devel) - if (COMMAND tbb_gen_vars) tbb_gen_vars(tbb) endif() diff --git a/third-party/tbb/src/tbb/arena.cpp b/third-party/tbb/src/tbb/arena.cpp index 41770fe5..0e7cf43c 100644 --- a/third-party/tbb/src/tbb/arena.cpp +++ b/third-party/tbb/src/tbb/arena.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -60,7 +60,6 @@ numa_binding_observer* construct_binding_observer( d1::task_arena* ta, int num_s if ((core_type >= 0 && core_type_count() > 1) || (numa_id >= 0 && numa_node_count() > 1) || max_threads_per_core > 0) { binding_observer = new(allocate_memory(sizeof(numa_binding_observer))) numa_binding_observer(ta, num_slots, numa_id, core_type, max_threads_per_core); __TBB_ASSERT(binding_observer, "Failure during NUMA binding observer allocation and construction"); - binding_observer->observe(true); } return binding_observer; } @@ -396,7 +395,7 @@ bool arena::is_top_priority() const { } bool arena::try_join() { - if (num_workers_active() < my_num_workers_allotted.load(std::memory_order_relaxed)) { + if (is_joinable()) { my_references += arena::ref_worker; return true; } @@ -545,7 +544,7 @@ void task_arena_impl::initialize(d1::task_arena_base& ta) { .set_max_threads_per_core(ta.max_threads_per_core()) .set_numa_id(ta.my_numa_id); #endif /*__TBB_ARENA_BINDING*/ - + if (ta.my_max_concurrency < 1) { #if __TBB_ARENA_BINDING ta.my_max_concurrency = (int)default_concurrency(arena_constraints); @@ -554,6 +553,17 @@ void task_arena_impl::initialize(d1::task_arena_base& ta) { #endif /*!__TBB_ARENA_BINDING*/ } +#if __TBB_CPUBIND_PRESENT + numa_binding_observer* observer = construct_binding_observer( + static_cast(&ta), arena::num_arena_slots(ta.my_max_concurrency, ta.my_num_reserved_slots), + ta.my_numa_id, ta.core_type(), ta.max_threads_per_core()); + if (observer) { + // TODO: Consider lazy initialization for internal arena so + // the direct calls to observer might be omitted until actual initialization. + observer->on_scheduler_entry(true); + } +#endif /*__TBB_CPUBIND_PRESENT*/ + __TBB_ASSERT(ta.my_arena.load(std::memory_order_relaxed) == nullptr, "Arena already initialized"); unsigned priority_level = arena_priority_level(ta.my_priority); threading_control* thr_control = threading_control::register_public_reference(); @@ -561,8 +571,11 @@ void task_arena_impl::initialize(d1::task_arena_base& ta) { ta.my_arena.store(&a, std::memory_order_release); #if __TBB_CPUBIND_PRESENT - a.my_numa_binding_observer = construct_binding_observer( - static_cast(&ta), a.my_num_slots, ta.my_numa_id, ta.core_type(), ta.max_threads_per_core()); + a.my_numa_binding_observer = observer; + if (observer) { + observer->on_scheduler_exit(true); + observer->observe(true); + } #endif /*__TBB_CPUBIND_PRESENT*/ } diff --git a/third-party/tbb/src/tbb/arena.h b/third-party/tbb/src/tbb/arena.h index 61bda0bf..1e95f117 100644 --- a/third-party/tbb/src/tbb/arena.h +++ b/third-party/tbb/src/tbb/arena.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -385,6 +385,10 @@ class arena: public padded bool is_top_priority() const; + bool is_joinable() const { + return num_workers_active() < my_num_workers_allotted.load(std::memory_order_relaxed); + } + bool try_join(); void set_allotment(unsigned allotment); @@ -429,8 +433,7 @@ void arena::advertise_new_work() { workers_delta = 1; } - bool wakeup_workers = is_mandatory_needed || are_workers_needed; - request_workers(mandatory_delta, workers_delta, wakeup_workers); + request_workers(mandatory_delta, workers_delta, /* wakeup_threads = */ true); } } diff --git a/third-party/tbb/src/tbb/concurrent_monitor.h b/third-party/tbb/src/tbb/concurrent_monitor.h index 3d20ef5b..3e5c4beb 100644 --- a/third-party/tbb/src/tbb/concurrent_monitor.h +++ b/third-party/tbb/src/tbb/concurrent_monitor.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -290,7 +290,17 @@ class concurrent_monitor_base { n = my_waitset.front(); if (n != end) { my_waitset.remove(*n); + +// GCC 12.x-13.x issues a warning here that to_wait_node(n)->my_is_in_list might have size 0, since n is +// a base_node pointer. (This cannot happen, because only wait_node pointers are added to my_waitset.) +#if (__TBB_GCC_VERSION >= 120100 && __TBB_GCC_VERSION < 140000 ) && !__clang__ && !__INTEL_COMPILER +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif to_wait_node(n)->my_is_in_list.store(false, std::memory_order_relaxed); +#if (__TBB_GCC_VERSION >= 120100 && __TBB_GCC_VERSION < 140000 ) && !__clang__ && !__INTEL_COMPILER +#pragma GCC diagnostic pop +#endif } } diff --git a/third-party/tbb/src/tbb/dynamic_link.cpp b/third-party/tbb/src/tbb/dynamic_link.cpp index 2d88f8bc..a21beb5a 100644 --- a/third-party/tbb/src/tbb/dynamic_link.cpp +++ b/third-party/tbb/src/tbb/dynamic_link.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -34,7 +34,8 @@ // Unify system calls #define dlopen( name, flags ) LoadLibrary( name ) #define dlsym( handle, name ) GetProcAddress( handle, name ) - #define dlclose( handle ) ( ! FreeLibrary( handle ) ) + // FreeLibrary return bool value that is not used. + #define dlclose( handle ) (void)( ! FreeLibrary( handle ) ) #define dlerror() GetLastError() #ifndef PATH_MAX #define PATH_MAX MAX_PATH diff --git a/third-party/tbb/src/tbb/global_control.cpp b/third-party/tbb/src/tbb/global_control.cpp index 12c146c2..127fc92d 100644 --- a/third-party/tbb/src/tbb/global_control.cpp +++ b/third-party/tbb/src/tbb/global_control.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include "oneapi/tbb/detail/_config.h" #include "oneapi/tbb/detail/_template_helpers.h" +#include "oneapi/tbb/cache_aligned_allocator.h" #include "oneapi/tbb/global_control.h" #include "oneapi/tbb/tbb_allocator.h" #include "oneapi/tbb/spin_mutex.h" @@ -49,6 +50,7 @@ class control_storage { std::set> my_list{}; spin_mutex my_list_mutex{}; public: + virtual ~control_storage() = default; virtual std::size_t default_value() const = 0; virtual void apply_active(std::size_t new_active) { my_active_value = new_active; @@ -138,11 +140,22 @@ class alignas(max_nfs_size) lifetime_control : public control_storage { } }; -static allowed_parallelism_control allowed_parallelism_ctl; -static stack_size_control stack_size_ctl; -static terminate_on_exception_control terminate_on_exception_ctl; -static lifetime_control lifetime_ctl; -static control_storage *controls[] = {&allowed_parallelism_ctl, &stack_size_ctl, &terminate_on_exception_ctl, &lifetime_ctl}; +static control_storage* controls[] = {nullptr, nullptr, nullptr, nullptr}; + +void global_control_acquire() { + controls[0] = new (cache_aligned_allocate(sizeof(allowed_parallelism_control))) allowed_parallelism_control{}; + controls[1] = new (cache_aligned_allocate(sizeof(stack_size_control))) stack_size_control{}; + controls[2] = new (cache_aligned_allocate(sizeof(terminate_on_exception_control))) terminate_on_exception_control{}; + controls[3] = new (cache_aligned_allocate(sizeof(lifetime_control))) lifetime_control{}; +} + +void global_control_release() { + for (auto& ptr : controls) { + ptr->~control_storage(); + cache_aligned_deallocate(ptr); + ptr = nullptr; + } +} void global_control_lock() { for (auto& ctl : controls) { diff --git a/third-party/tbb/src/tbb/governor.cpp b/third-party/tbb/src/tbb/governor.cpp index 1a66f5de..55175196 100644 --- a/third-party/tbb/src/tbb/governor.cpp +++ b/third-party/tbb/src/tbb/governor.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -42,6 +42,8 @@ namespace detail { namespace r1 { void clear_address_waiter_table(); +void global_control_acquire(); +void global_control_release(); //! global_control.cpp contains definition bool remove_and_check_if_empty(d1::global_control& gc); @@ -60,6 +62,7 @@ namespace system_topology { //------------------------------------------------------------------------ void governor::acquire_resources () { + global_control_acquire(); #if __TBB_USE_POSIX int status = theTLS.create(auto_terminate); #else @@ -85,6 +88,7 @@ void governor::release_resources () { system_topology::destroy(); dynamic_unlink_all(); + global_control_release(); } rml::tbb_server* governor::create_rml_server ( rml::tbb_client& client ) { diff --git a/third-party/tbb/src/tbb/governor.h b/third-party/tbb/src/tbb/governor.h index 9d5e94d3..573443d7 100644 --- a/third-party/tbb/src/tbb/governor.h +++ b/third-party/tbb/src/tbb/governor.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -138,6 +138,8 @@ class governor { static bool wait_package_enabled() { return cpu_features.waitpkg_enabled; } #endif + static bool hybrid_cpu() { return cpu_features.hybrid; } + static bool rethrow_exception_broken() { return is_rethrow_broken; } static bool is_itt_present() { diff --git a/third-party/tbb/src/tbb/misc.cpp b/third-party/tbb/src/tbb/misc.cpp index 17da1238..115a5f38 100644 --- a/third-party/tbb/src/tbb/misc.cpp +++ b/third-party/tbb/src/tbb/misc.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -92,6 +92,8 @@ void PrintExtraVersionInfo( const char* category, const char* format, ... ) { //! check for transaction support. #if _MSC_VER #include // for __cpuid +#elif __APPLE__ +#include #endif #if __TBB_x86_32 || __TBB_x86_64 @@ -131,13 +133,22 @@ void detect_cpu_features(cpu_features_type& cpu_features) { #if __TBB_x86_32 || __TBB_x86_64 const int rtm_ebx_mask = 1 << 11; const int waitpkg_ecx_mask = 1 << 5; + const int hybrid_edx_mask = 1 << 15; int registers[4] = {0}; - // Check RTM and WAITPKG + // Check RTM, WAITPKG, HYBRID check_cpuid(7, 0, registers); cpu_features.rtm_enabled = (registers[1] & rtm_ebx_mask) != 0; cpu_features.waitpkg_enabled = (registers[2] & waitpkg_ecx_mask) != 0; -#endif /* (__TBB_x86_32 || __TBB_x86_64) */ + cpu_features.hybrid = (registers[3] & hybrid_edx_mask) != 0; +#elif __APPLE__ + // Check HYBRID (hw.nperflevels > 1) + uint64_t nperflevels = 0; + size_t nperflevels_size = sizeof(nperflevels); + if (!sysctlbyname("hw.nperflevels", &nperflevels, &nperflevels_size, nullptr, 0)) { + cpu_features.hybrid = (nperflevels > 1); + } +#endif } } // namespace r1 diff --git a/third-party/tbb/src/tbb/misc.h b/third-party/tbb/src/tbb/misc.h index b11c0029..988c29b1 100644 --- a/third-party/tbb/src/tbb/misc.h +++ b/third-party/tbb/src/tbb/misc.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -211,6 +211,7 @@ T1 atomic_update(std::atomic& dst, T1 newValue, Pred compare) { struct cpu_features_type { bool rtm_enabled{false}; bool waitpkg_enabled{false}; + bool hybrid{false}; }; void detect_cpu_features(cpu_features_type& cpu_features); diff --git a/third-party/tbb/src/tbb/scheduler_common.h b/third-party/tbb/src/tbb/scheduler_common.h index 56610ffe..f9e8a68d 100644 --- a/third-party/tbb/src/tbb/scheduler_common.h +++ b/third-party/tbb/src/tbb/scheduler_common.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -268,12 +268,7 @@ class stealing_loop_backoff { // no worse than 2x the optimal spin time. Or perhaps a time-slice quantum is the right amount. stealing_loop_backoff(int num_workers, int yields_multiplier) : my_pause_threshold{ 2 * (num_workers + 1) } -#if __APPLE__ - // threshold value tuned separately for macOS due to high cost of sched_yield there - , my_yield_threshold{10 * yields_multiplier} -#else , my_yield_threshold{100 * yields_multiplier} -#endif , my_pause_count{} , my_yield_count{} {} diff --git a/third-party/tbb/src/tbb/tbb.rc b/third-party/tbb/src/tbb/tbb.rc index 6c8b99fc..57e9d391 100644 --- a/third-party/tbb/src/tbb/tbb.rc +++ b/third-party/tbb/src/tbb/tbb.rc @@ -1,4 +1,4 @@ -// Copyright (c) 2005-2023 Intel Corporation +// Copyright (c) 2005-2024 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -54,7 +54,7 @@ BEGIN VALUE "CompanyName", "Intel Corporation\0" VALUE "FileDescription", "oneAPI Threading Building Blocks (oneTBB) library\0" VALUE "FileVersion", TBB_VERSION "\0" - VALUE "LegalCopyright", "Copyright 2005-2023 Intel Corporation. All Rights Reserved.\0" + VALUE "LegalCopyright", "Copyright 2005-2024 Intel Corporation. All Rights Reserved.\0" VALUE "LegalTrademarks", "\0" #ifndef TBB_USE_DEBUG VALUE "OriginalFilename", "tbb12.dll\0" diff --git a/third-party/tbb/src/tbb/tcm_adaptor.cpp b/third-party/tbb/src/tbb/tcm_adaptor.cpp index 3963ae13..e20ebb83 100644 --- a/third-party/tbb/src/tbb/tcm_adaptor.cpp +++ b/third-party/tbb/src/tbb/tcm_adaptor.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2023 Intel Corporation + Copyright (c) 2023-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -137,11 +137,9 @@ class tcm_client : public pm_client { // The permit has changed during the reading, so the callback will be invoked soon one more time and // we can just skip this renegotiation iteration. if (!new_permit.flags.stale) { - __TBB_ASSERT( - new_permit.state != TCM_PERMIT_STATE_INACTIVE || new_concurrency == 0, - "TCM did not nullify resources while deactivating the permit" - ); - delta = update_concurrency(new_concurrency); + // If there is no other demand in TCM, the permit may still have granted concurrency but + // be in the deactivated state thus we enforce 0 allotment to preserve arena invariants. + delta = update_concurrency(new_permit.state != TCM_PERMIT_STATE_INACTIVE ? new_concurrency : 0); } } if (delta) { diff --git a/third-party/tbb/src/tbb/thread_dispatcher.cpp b/third-party/tbb/src/tbb/thread_dispatcher.cpp index 8f33dc06..69a108d6 100644 --- a/third-party/tbb/src/tbb/thread_dispatcher.cpp +++ b/third-party/tbb/src/tbb/thread_dispatcher.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2022-2023 Intel Corporation + Copyright (c) 2022-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -164,6 +164,18 @@ thread_dispatcher_client* thread_dispatcher::client_in_need(thread_dispatcher_cl return client_in_need(my_client_list, my_next_client); } +bool thread_dispatcher::is_any_client_in_need() { + client_list_mutex_type::scoped_lock lock(my_list_mutex, /*is_writer=*/false); + for (auto& priority_list : my_client_list) { + for (auto& client : priority_list) { + if (client.is_joinable()) { + return true; + } + } + } + return false; +} + void thread_dispatcher::adjust_job_count_estimate(int delta) { my_server->adjust_job_count_estimate(delta); } diff --git a/third-party/tbb/src/tbb/thread_dispatcher.h b/third-party/tbb/src/tbb/thread_dispatcher.h index f11344ca..e511e2b7 100644 --- a/third-party/tbb/src/tbb/thread_dispatcher.h +++ b/third-party/tbb/src/tbb/thread_dispatcher.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2022-2023 Intel Corporation + Copyright (c) 2022-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -44,6 +44,7 @@ class thread_dispatcher : no_copy, rml::tbb_client { thread_dispatcher_client* create_client(arena& a); void register_client(thread_dispatcher_client* client); bool try_unregister_client(thread_dispatcher_client* client, std::uint64_t aba_epoch, unsigned priority); + bool is_any_client_in_need(); void adjust_job_count_estimate(int delta); void release(bool blocking_terminate); diff --git a/third-party/tbb/src/tbb/thread_dispatcher_client.h b/third-party/tbb/src/tbb/thread_dispatcher_client.h index c93ff31d..f7c199cb 100644 --- a/third-party/tbb/src/tbb/thread_dispatcher_client.h +++ b/third-party/tbb/src/tbb/thread_dispatcher_client.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2022-2023 Intel Corporation + Copyright (c) 2022-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -32,6 +32,11 @@ class thread_dispatcher_client : public d1::intrusive_list_node /* Need for list bool try_join() { return my_arena.try_join(); } + + bool is_joinable() { + return my_arena.is_joinable(); + } + void process(thread_data& td) { my_arena.process(td); } diff --git a/third-party/tbb/src/tbb/thread_request_serializer.cpp b/third-party/tbb/src/tbb/thread_request_serializer.cpp index 5973f14c..6019f732 100644 --- a/third-party/tbb/src/tbb/thread_request_serializer.cpp +++ b/third-party/tbb/src/tbb/thread_request_serializer.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2022-2023 Intel Corporation + Copyright (c) 2022-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -37,8 +37,8 @@ void thread_request_serializer::update(int delta) { if (prev_pending_delta == pending_delta_base) { delta = int(my_pending_delta.exchange(pending_delta_base) & delta_mask) - int(pending_delta_base); mutex_type::scoped_lock lock(my_mutex); - my_total_request += delta; - delta = limit_delta(delta, my_soft_limit, my_total_request); + my_total_request.store(my_total_request.load(std::memory_order_relaxed) + delta, std::memory_order_relaxed); + delta = limit_delta(delta, my_soft_limit, my_total_request.load(std::memory_order_relaxed)); my_thread_dispatcher.adjust_job_count_estimate(delta); } } @@ -46,7 +46,7 @@ void thread_request_serializer::update(int delta) { void thread_request_serializer::set_active_num_workers(int soft_limit) { mutex_type::scoped_lock lock(my_mutex); int delta = soft_limit - my_soft_limit; - delta = limit_delta(delta, my_total_request, soft_limit); + delta = limit_delta(delta, my_total_request.load(std::memory_order_relaxed), soft_limit); my_thread_dispatcher.adjust_job_count_estimate(delta); my_soft_limit = soft_limit; } @@ -109,6 +109,8 @@ void thread_request_serializer_proxy::set_active_num_workers(int soft_limit) { } } +int thread_request_serializer_proxy::num_workers_requested() { return my_serializer.num_workers_requested(); } + void thread_request_serializer_proxy::update(int delta) { my_serializer.update(delta); } void thread_request_serializer_proxy::enable_mandatory_concurrency(mutex_type::scoped_lock& lock) { diff --git a/third-party/tbb/src/tbb/thread_request_serializer.h b/third-party/tbb/src/tbb/thread_request_serializer.h index 261a46d7..9dc9799e 100644 --- a/third-party/tbb/src/tbb/thread_request_serializer.h +++ b/third-party/tbb/src/tbb/thread_request_serializer.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2022-2023 Intel Corporation + Copyright (c) 2022-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -39,6 +39,7 @@ class thread_request_serializer : public thread_request_observer { public: thread_request_serializer(thread_dispatcher& td, int soft_limit); void set_active_num_workers(int soft_limit); + int num_workers_requested() { return my_total_request.load(std::memory_order_relaxed); } bool is_no_workers_avaliable() { return my_soft_limit == 0; } private: @@ -48,7 +49,7 @@ class thread_request_serializer : public thread_request_observer { thread_dispatcher& my_thread_dispatcher; int my_soft_limit{ 0 }; - int my_total_request{ 0 }; + std::atomic my_total_request{ 0 }; // my_pending_delta is set to pending_delta_base to have ability to hold negative values // consider increase base since thead number will be bigger than 1 << 15 static constexpr std::uint64_t pending_delta_base = 1 << 15; @@ -63,6 +64,7 @@ class thread_request_serializer_proxy : public thread_request_observer { thread_request_serializer_proxy(thread_dispatcher& td, int soft_limit); void register_mandatory_request(int mandatory_delta); void set_active_num_workers(int soft_limit); + int num_workers_requested(); private: void update(int delta) override; diff --git a/third-party/tbb/src/tbb/threading_control.cpp b/third-party/tbb/src/tbb/threading_control.cpp index f253c83d..1ca18378 100644 --- a/third-party/tbb/src/tbb/threading_control.cpp +++ b/third-party/tbb/src/tbb/threading_control.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2022-2023 Intel Corporation + Copyright (c) 2022-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -164,6 +164,10 @@ void threading_control_impl::adjust_demand(threading_control_client tc_client, i my_permit_manager->adjust_demand(c, mandatory_delta, workers_delta); } +bool threading_control_impl::is_any_other_client_active() { + return my_thread_request_serializer->num_workers_requested() > 0 ? my_thread_dispatcher->is_any_client_in_need() : false; +} + thread_control_monitor& threading_control_impl::get_waiting_threads_monitor() { return *my_waiting_threads_monitor; } @@ -389,6 +393,10 @@ void threading_control::adjust_demand(threading_control_client client, int manda my_pimpl->adjust_demand(client, mandatory_delta, workers_delta); } +bool threading_control::is_any_other_client_active() { + return my_pimpl->is_any_other_client_active(); +} + thread_control_monitor& threading_control::get_waiting_threads_monitor() { return my_pimpl->get_waiting_threads_monitor(); } diff --git a/third-party/tbb/src/tbb/threading_control.h b/third-party/tbb/src/tbb/threading_control.h index 55347189..7381b297 100644 --- a/third-party/tbb/src/tbb/threading_control.h +++ b/third-party/tbb/src/tbb/threading_control.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2022-2023 Intel Corporation + Copyright (c) 2022-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -69,6 +69,7 @@ class threading_control_impl { unsigned max_num_workers(); void adjust_demand(threading_control_client, int mandatory_delta, int workers_delta); + bool is_any_other_client_active(); thread_control_monitor& get_waiting_threads_monitor(); @@ -116,6 +117,7 @@ class threading_control { static unsigned max_num_workers(); void adjust_demand(threading_control_client client, int mandatory_delta, int workers_delta); + bool is_any_other_client_active(); thread_control_monitor& get_waiting_threads_monitor(); diff --git a/third-party/tbb/src/tbb/tools_api/ittnotify.h b/third-party/tbb/src/tbb/tools_api/ittnotify.h index d15aae26..eb1571dc 100644 --- a/third-party/tbb/src/tbb/tools_api/ittnotify.h +++ b/third-party/tbb/src/tbb/tools_api/ittnotify.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -101,6 +101,11 @@ The same ID may not be reused for different instances, unless a previous # define ITT_OS_FREEBSD 4 #endif /* ITT_OS_FREEBSD */ +#ifndef ITT_OS_OPENBSD +# define ITT_OS_OPENBSD 5 +#endif /* ITT_OS_OPENBSD */ + + #ifndef ITT_OS # if defined WIN32 || defined _WIN32 # define ITT_OS ITT_OS_WIN @@ -108,6 +113,8 @@ The same ID may not be reused for different instances, unless a previous # define ITT_OS ITT_OS_MAC # elif defined( __FreeBSD__ ) # define ITT_OS ITT_OS_FREEBSD +# elif defined( __OpenBSD__ ) +# define ITT_OS ITT_OS_OPENBSD # else # define ITT_OS ITT_OS_LINUX # endif @@ -129,6 +136,10 @@ The same ID may not be reused for different instances, unless a previous # define ITT_PLATFORM_FREEBSD 4 #endif /* ITT_PLATFORM_FREEBSD */ +#ifndef ITT_PLATFORM_OPENBSD +# define ITT_PLATFORM_OPENBSD 5 +#endif /* ITT_PLATFORM_OPENBSD */ + #ifndef ITT_PLATFORM # if ITT_OS==ITT_OS_WIN # define ITT_PLATFORM ITT_PLATFORM_WIN @@ -136,6 +147,8 @@ The same ID may not be reused for different instances, unless a previous # define ITT_PLATFORM ITT_PLATFORM_MAC # elif ITT_OS==ITT_OS_FREEBSD # define ITT_PLATFORM ITT_PLATFORM_FREEBSD +# elif ITT_OS==ITT_OS_OPENBSD +# define ITT_PLATFORM ITT_PLATFORM_OPENBSD # else # define ITT_PLATFORM ITT_PLATFORM_POSIX # endif @@ -305,7 +318,7 @@ extern "C" { * only pauses tracing and analyzing memory access. * It does not pause tracing or analyzing threading APIs. * . - * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE: + * Intel(R) VTune(TM) Profiler: * - Does continue to record when new threads are started. * . * - Other effects: diff --git a/third-party/tbb/src/tbb/tools_api/ittnotify_config.h b/third-party/tbb/src/tbb/tools_api/ittnotify_config.h index 44edfd67..001d42e0 100644 --- a/third-party/tbb/src/tbb/tools_api/ittnotify_config.h +++ b/third-party/tbb/src/tbb/tools_api/ittnotify_config.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -34,6 +34,10 @@ # define ITT_OS_FREEBSD 4 #endif /* ITT_OS_FREEBSD */ +#ifndef ITT_OS_OPENBSD +# define ITT_OS_OPENBSD 5 +#endif /* ITT_OS_OPENBSD */ + #ifndef ITT_OS # if defined WIN32 || defined _WIN32 # define ITT_OS ITT_OS_WIN @@ -41,6 +45,8 @@ # define ITT_OS ITT_OS_MAC # elif defined( __FreeBSD__ ) # define ITT_OS ITT_OS_FREEBSD +# elif defined( __OpenBSD__ ) +# define ITT_OS ITT_OS_OPENBSD # else # define ITT_OS ITT_OS_LINUX # endif @@ -62,6 +68,10 @@ # define ITT_PLATFORM_FREEBSD 4 #endif /* ITT_PLATFORM_FREEBSD */ +#ifndef ITT_PLATFORM_OPENBSD +# define ITT_PLATFORM_OPENBSD 5 +#endif /* ITT_PLATFORM_OPENBSD */ + #ifndef ITT_PLATFORM # if ITT_OS==ITT_OS_WIN # define ITT_PLATFORM ITT_PLATFORM_WIN @@ -69,6 +79,8 @@ # define ITT_PLATFORM ITT_PLATFORM_MAC # elif ITT_OS==ITT_OS_FREEBSD # define ITT_PLATFORM ITT_PLATFORM_FREEBSD +# elif ITT_OS==ITT_OS_OPENBSD +# define ITT_PLATFORM ITT_PLATFORM_OPENBSD # else # define ITT_PLATFORM ITT_PLATFORM_POSIX # endif @@ -235,7 +247,7 @@ #define API_VERSION_BUILD 20230630 #ifndef API_VERSION_NUM -#define API_VERSION_NUM 3.24.2 +#define API_VERSION_NUM 3.24.4 #endif /* API_VERSION_NUM */ #define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \ @@ -634,7 +646,7 @@ typedef struct ___itt_global h->nameA = NULL; \ h->nameW = name ? _wcsdup(name) : NULL; \ h->domainA = NULL; \ - h->domainW = name ? _wcsdup(domain) : NULL; \ + h->domainW = domain ? _wcsdup(domain) : NULL; \ h->type = type; \ h->index = 0; \ h->next = NULL; \ diff --git a/third-party/tbb/src/tbb/tools_api/ittnotify_static.c b/third-party/tbb/src/tbb/tools_api/ittnotify_static.c index ab396d20..c3a53bf0 100644 --- a/third-party/tbb/src/tbb/tools_api/ittnotify_static.c +++ b/third-party/tbb/src/tbb/tools_api/ittnotify_static.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -81,7 +81,7 @@ static const char api_version[] = API_VERSION "\0\n@(#) $Revision$\n"; #if ITT_OS==ITT_OS_WIN static const char* ittnotify_lib_name = "libittnotify.dll"; -#elif ITT_OS==ITT_OS_LINUX || ITT_OS==ITT_OS_FREEBSD +#elif ITT_OS==ITT_OS_LINUX || ITT_OS==ITT_OS_FREEBSD|| ITT_OS==ITT_OS_OPENBSD static const char* ittnotify_lib_name = "libittnotify.so"; #elif ITT_OS==ITT_OS_MAC static const char* ittnotify_lib_name = "libittnotify.dylib"; diff --git a/third-party/tbb/src/tbb/tools_api/legacy/ittnotify.h b/third-party/tbb/src/tbb/tools_api/legacy/ittnotify.h index b5999c2a..837bc480 100644 --- a/third-party/tbb/src/tbb/tools_api/legacy/ittnotify.h +++ b/third-party/tbb/src/tbb/tools_api/legacy/ittnotify.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -245,7 +245,7 @@ extern "C" { * only pauses tracing and analyzing memory access. * It does not pause tracing or analyzing threading APIs. * . - * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE: + * - Intel(R) VTune(TM) Profiler: * - Does continue to record when new threads are started. * . * - Other effects: diff --git a/third-party/tbb/src/tbb/waiters.h b/third-party/tbb/src/tbb/waiters.h index e2aa4abc..8ed431f8 100644 --- a/third-party/tbb/src/tbb/waiters.h +++ b/third-party/tbb/src/tbb/waiters.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -58,6 +58,24 @@ class outermost_worker_waiter : public waiter_base { __TBB_ASSERT(t == nullptr, nullptr); if (is_worker_should_leave(slot)) { + if (!governor::hybrid_cpu()) { + static constexpr std::chrono::microseconds worker_wait_leave_duration(1000); + static_assert(worker_wait_leave_duration > std::chrono::steady_clock::duration(1), "Clock resolution is not enough for measured interval."); + + for (auto t1 = std::chrono::steady_clock::now(), t2 = t1; + std::chrono::duration_cast(t2 - t1) < worker_wait_leave_duration; + t2 = std::chrono::steady_clock::now()) + { + if (!my_arena.is_empty() && !my_arena.is_recall_requested()) { + return true; + } + + if (my_arena.my_threading_control->is_any_other_client_active()) { + break; + } + d0::yield(); + } + } // Leave dispatch loop return false; } @@ -114,6 +132,7 @@ class sleep_waiter : public waiter_base { void sleep(std::uintptr_t uniq_tag, Pred wakeup_condition) { my_arena.get_waiting_threads_monitor().wait(wakeup_condition, market_context{uniq_tag, &my_arena}); + reset_wait(); } }; @@ -139,7 +158,6 @@ class external_waiter : public sleep_waiter { auto wakeup_condition = [&] { return !my_arena.is_empty() || !my_wait_ctx.continue_execution(); }; sleep(std::uintptr_t(&my_wait_ctx), wakeup_condition); - my_backoff.reset_wait(); } d1::wait_context* wait_ctx() { @@ -176,11 +194,6 @@ class coroutine_waiter : public sleep_waiter { auto wakeup_condition = [&] { return !my_arena.is_empty() || sp->m_is_owner_recalled.load(std::memory_order_relaxed); }; sleep(std::uintptr_t(sp), wakeup_condition); - my_backoff.reset_wait(); - } - - void reset_wait() { - my_backoff.reset_wait(); } d1::wait_context* wait_ctx() { diff --git a/third-party/tbb/src/tbbbind/CMakeLists.txt b/third-party/tbb/src/tbbbind/CMakeLists.txt index 24cd3e5d..993dc8b8 100644 --- a/third-party/tbb/src/tbbbind/CMakeLists.txt +++ b/third-party/tbb/src/tbbbind/CMakeLists.txt @@ -12,9 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -if (DEFINED CMAKE_SKIP_BUILD_RPATH) - set(CMAKE_SKIP_BUILD_RPATH_OLD_VALUE ${CMAKE_SKIP_BUILD_RPATH}) -endif() set(CMAKE_SKIP_BUILD_RPATH TRUE) function(tbbbind_build TBBBIND_NAME REQUIRED_HWLOC_TARGET) @@ -106,10 +103,3 @@ else() tbbbind_build(tbbbind_2_5 HWLOC::hwloc_2_5 ) endif() - -if (DEFINED CMAKE_SKIP_BUILD_RPATH_OLD_VALUE) - set(CMAKE_SKIP_BUILD_RPATH ${CMAKE_SKIP_BUILD_RPATH_OLD_VALUE}) - unset(CMAKE_SKIP_BUILD_RPATH_OLD_VALUE) -else() - unset(CMAKE_SKIP_BUILD_RPATH) -endif() diff --git a/third-party/tbb/src/tbbbind/tbb_bind.rc b/third-party/tbb/src/tbbbind/tbb_bind.rc index bc060353..2d2b806e 100644 --- a/third-party/tbb/src/tbbbind/tbb_bind.rc +++ b/third-party/tbb/src/tbbbind/tbb_bind.rc @@ -1,4 +1,4 @@ -// Copyright (c) 2005-2023 Intel Corporation +// Copyright (c) 2005-2024 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -54,7 +54,7 @@ BEGIN VALUE "CompanyName", "Intel Corporation\0" VALUE "FileDescription", "oneAPI Threading Building Blocks (oneTBB) library\0" VALUE "FileVersion", TBB_VERSION "\0" - VALUE "LegalCopyright", "Copyright 2005-2023 Intel Corporation. All Rights Reserved.\0" + VALUE "LegalCopyright", "Copyright 2005-2024 Intel Corporation. All Rights Reserved.\0" VALUE "LegalTrademarks", "\0" #ifndef TBB_USE_DEBUG VALUE "OriginalFilename", "tbbbind.dll\0" diff --git a/third-party/tbb/src/tbbmalloc/CMakeLists.txt b/third-party/tbb/src/tbbmalloc/CMakeLists.txt index 0386daa3..76044fce 100644 --- a/third-party/tbb/src/tbbmalloc/CMakeLists.txt +++ b/third-party/tbb/src/tbbmalloc/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023 Intel Corporation +# Copyright (c) 2020-2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -109,5 +109,15 @@ target_link_libraries(tbbmalloc ${TBB_COMMON_LINK_LIBS} ) -tbb_install_target(tbbmalloc) +if(TBB_BUILD_APPLE_FRAMEWORKS) + set_target_properties(tbbmalloc PROPERTIES + FRAMEWORK TRUE + FRAMEWORK_VERSION ${TBBMALLOC_BINARY_VERSION}.${TBB_BINARY_MINOR_VERSION} + XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER com.intel.tbbmalloc + MACOSX_FRAMEWORK_IDENTIFIER com.intel.tbbmalloc + MACOSX_FRAMEWORK_BUNDLE_VERSION ${TBBMALLOC_BINARY_VERSION}.${TBB_BINARY_MINOR_VERSION} + MACOSX_FRAMEWORK_SHORT_VERSION_STRING ${TBBMALLOC_BINARY_VERSION} + ) +endif() +tbb_install_target(tbbmalloc) diff --git a/third-party/tbb/src/tbbmalloc/TypeDefinitions.h b/third-party/tbb/src/tbbmalloc/TypeDefinitions.h index 81149166..bfadf61d 100644 --- a/third-party/tbb/src/tbbmalloc/TypeDefinitions.h +++ b/third-party/tbb/src/tbbmalloc/TypeDefinitions.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ # define __ARCH_ipf 1 # elif defined(_M_IX86)||defined(__i386__) // the latter for MinGW support # define __ARCH_x86_32 1 -# elif defined(_M_ARM) || defined(_M_ARM64) +# elif defined(_M_ARM) || defined(_M_ARM64) || defined(__aarch64__) // the latter for MinGW support # define __ARCH_other 1 # else # error Unknown processor architecture for Windows diff --git a/third-party/tbb/src/tbbmalloc/frontend.cpp b/third-party/tbb/src/tbbmalloc/frontend.cpp index c657d804..77f9d659 100644 --- a/third-party/tbb/src/tbbmalloc/frontend.cpp +++ b/third-party/tbb/src/tbbmalloc/frontend.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -1521,7 +1521,7 @@ bool Block::readyToShare() { MallocMutex::scoped_lock scoped_cs(publicFreeListLock); if ( (oldVal=publicFreeList)==nullptr ) - (intptr_t&)(publicFreeList) = UNUSABLE; + publicFreeList = reinterpret_cast(UNUSABLE); } #endif return oldVal==nullptr; diff --git a/third-party/tbb/src/tbbmalloc/tbbmalloc.rc b/third-party/tbb/src/tbbmalloc/tbbmalloc.rc index 77e87ff5..2821adda 100644 --- a/third-party/tbb/src/tbbmalloc/tbbmalloc.rc +++ b/third-party/tbb/src/tbbmalloc/tbbmalloc.rc @@ -1,4 +1,4 @@ -// Copyright (c) 2005-2023 Intel Corporation +// Copyright (c) 2005-2024 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -54,7 +54,7 @@ BEGIN VALUE "CompanyName", "Intel Corporation\0" VALUE "FileDescription", "oneAPI Threading Building Blocks (oneTBB) library\0" VALUE "FileVersion", TBB_VERSION "\0" - VALUE "LegalCopyright", "Copyright 2005-2023 Intel Corporation. All Rights Reserved.\0" + VALUE "LegalCopyright", "Copyright 2005-2024 Intel Corporation. All Rights Reserved.\0" VALUE "LegalTrademarks", "\0" #ifndef TBB_USE_DEBUG VALUE "OriginalFilename", "tbbmalloc.dll\0" diff --git a/third-party/tbb/src/tbbmalloc/tbbmalloc_internal.h b/third-party/tbb/src/tbbmalloc/tbbmalloc_internal.h index c81dc060..44fa47aa 100644 --- a/third-party/tbb/src/tbbmalloc/tbbmalloc_internal.h +++ b/third-party/tbb/src/tbbmalloc/tbbmalloc_internal.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -102,7 +102,11 @@ void suppress_unused_warning( const T& ) {} /* * Default huge page size */ +#if defined __loongarch64 +static const size_t HUGE_PAGE_SIZE = 32 * 1024 * 1024; +#else static const size_t HUGE_PAGE_SIZE = 2 * 1024 * 1024; +#endif /********** End of global default constants *********/ diff --git a/third-party/tbb/src/tbbmalloc_proxy/CMakeLists.txt b/third-party/tbb/src/tbbmalloc_proxy/CMakeLists.txt index 5c23f15d..554ddc85 100644 --- a/third-party/tbb/src/tbbmalloc_proxy/CMakeLists.txt +++ b/third-party/tbb/src/tbbmalloc_proxy/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022 Intel Corporation +# Copyright (c) 2020-2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -90,4 +90,14 @@ target_link_libraries(tbbmalloc_proxy ${TBB_COMMON_LINK_LIBS} ) +if(TBB_BUILD_APPLE_FRAMEWORKS) + set_target_properties(tbbmalloc_proxy PROPERTIES + FRAMEWORK TRUE + FRAMEWORK_VERSION ${TBBMALLOC_BINARY_VERSION}.${TBB_BINARY_MINOR_VERSION} + XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER com.intel.tbbmalloc-proxy + MACOSX_FRAMEWORK_IDENTIFIER com.intel.tbbmalloc-proxy + MACOSX_FRAMEWORK_BUNDLE_VERSION ${TBBMALLOC_BINARY_VERSION}.${TBB_BINARY_MINOR_VERSION} + MACOSX_FRAMEWORK_SHORT_VERSION_STRING ${TBBMALLOC_BINARY_VERSION}) +endif() + tbb_install_target(tbbmalloc_proxy) diff --git a/third-party/tbb/src/tbbmalloc_proxy/tbbmalloc_proxy.rc b/third-party/tbb/src/tbbmalloc_proxy/tbbmalloc_proxy.rc index 20b3b480..1884b119 100644 --- a/third-party/tbb/src/tbbmalloc_proxy/tbbmalloc_proxy.rc +++ b/third-party/tbb/src/tbbmalloc_proxy/tbbmalloc_proxy.rc @@ -1,4 +1,4 @@ -// Copyright (c) 2005-2023 Intel Corporation +// Copyright (c) 2005-2024 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -54,7 +54,7 @@ BEGIN VALUE "CompanyName", "Intel Corporation\0" VALUE "FileDescription", "oneAPI Threading Building Blocks (oneTBB) library\0" VALUE "FileVersion", TBB_VERSION "\0" - VALUE "LegalCopyright", "Copyright 2005-2023 Intel Corporation. All Rights Reserved.\0" + VALUE "LegalCopyright", "Copyright 2005-2024 Intel Corporation. All Rights Reserved.\0" VALUE "LegalTrademarks", "\0" #ifndef TBB_USE_DEBUG VALUE "OriginalFilename", "tbbmalloc_proxy.dll\0" diff --git a/third-party/tbb/test/CMakeLists.txt b/third-party/tbb/test/CMakeLists.txt index 0e0b3966..cfde681b 100644 --- a/third-party/tbb/test/CMakeLists.txt +++ b/third-party/tbb/test/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023 Intel Corporation +# Copyright (c) 2020-2024 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -29,6 +29,9 @@ function(tbb_add_test) add_executable(${_tbb_test_TARGET_NAME} ${_tbb_test_SUBDIR}/${_tbb_test_NAME}.cpp) target_include_directories(${_tbb_test_TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}) + # cmake>=3.4 no longer adds flags to export symbols from executables (CMP0065) + set_property(TARGET ${_tbb_test_TARGET_NAME} PROPERTY ENABLE_EXPORTS TRUE) + target_compile_options(${_tbb_test_TARGET_NAME} PRIVATE ${TBB_CXX_STD_FLAG} @@ -40,6 +43,10 @@ function(tbb_add_test) ${TBB_COMMON_COMPILE_FLAGS} ) + if (TBB_BUILD_APPLE_FRAMEWORKS) + add_compile_definitions(TBB_USE_APPLE_FRAMEWORKS) + endif() + if (ANDROID_PLATFORM) # Expand the linker rpath by the CMAKE_LIBRARY_OUTPUT_DIRECTORY path since clang compiler from Android SDK # doesn't respect the -L flag. @@ -558,7 +565,7 @@ if (TARGET TBB::tbb) target_include_directories(test_implicit_linkage_on_windows PRIVATE $) set_target_properties(test_implicit_linkage_on_windows PROPERTIES - LINK_OPTIONS /LIBPATH:$) + LINK_OPTIONS LINKER:/LIBPATH:$) add_dependencies(test_implicit_linkage_on_windows TBB::tbb) endif() endif() @@ -590,39 +597,39 @@ if (TARGET TBB::tbbmalloc) endif() # ---------------------------------------------------------------------------------------- # Whitebox testing - - add_executable(test_malloc_whitebox tbbmalloc/test_malloc_whitebox.cpp) - - target_include_directories(test_malloc_whitebox - PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/../include - ${CMAKE_CURRENT_SOURCE_DIR}/.. - ${CMAKE_CURRENT_SOURCE_DIR}) - target_compile_definitions(test_malloc_whitebox PRIVATE __TBBMALLOC_BUILD) - target_compile_options(test_malloc_whitebox - PRIVATE - ${TBB_CXX_STD_FLAG} - ${TBB_WARNING_SUPPRESS} - ${TBB_TEST_COMPILE_FLAGS} - ${TBB_COMMON_COMPILE_FLAGS} - ${TBBMALLOC_LIB_COMPILE_FLAGS} - ) - if (ANDROID_PLATFORM) - add_test(NAME test_malloc_whitebox - COMMAND ${CMAKE_COMMAND} - -DBINARIES_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY} - -DTEST_NAME=test_malloc_whitebox - -P ${PROJECT_SOURCE_DIR}/cmake/android/test_launcher.cmake) - else() - add_test(NAME test_malloc_whitebox COMMAND test_malloc_whitebox --force-colors=1) - endif() - if (COMMAND target_link_options) - target_link_options(test_malloc_whitebox PRIVATE ${TBB_COMMON_LINK_FLAGS}) - else() - target_link_libraries(test_malloc_whitebox PRIVATE ${TBB_COMMON_LINK_FLAGS}) + if (NOT TBB_EMSCRIPTEN) + add_executable(test_malloc_whitebox tbbmalloc/test_malloc_whitebox.cpp) + + target_include_directories(test_malloc_whitebox + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/../include + ${CMAKE_CURRENT_SOURCE_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}) + target_compile_definitions(test_malloc_whitebox PRIVATE __TBBMALLOC_BUILD) + target_compile_options(test_malloc_whitebox + PRIVATE + ${TBB_CXX_STD_FLAG} + ${TBB_WARNING_SUPPRESS} + ${TBB_TEST_COMPILE_FLAGS} + ${TBB_COMMON_COMPILE_FLAGS} + ${TBBMALLOC_LIB_COMPILE_FLAGS} + ) + if (ANDROID_PLATFORM) + add_test(NAME test_malloc_whitebox + COMMAND ${CMAKE_COMMAND} + -DBINARIES_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY} + -DTEST_NAME=test_malloc_whitebox + -P ${PROJECT_SOURCE_DIR}/cmake/android/test_launcher.cmake) + else() + add_test(NAME test_malloc_whitebox COMMAND test_malloc_whitebox --force-colors=1) + endif() + if (COMMAND target_link_options) + target_link_options(test_malloc_whitebox PRIVATE ${TBB_COMMON_LINK_FLAGS}) + else() + target_link_libraries(test_malloc_whitebox PRIVATE ${TBB_COMMON_LINK_FLAGS}) + endif() + target_link_libraries(test_malloc_whitebox PRIVATE Threads::Threads ${TBB_COMMON_LINK_LIBS}) endif() - target_link_libraries(test_malloc_whitebox PRIVATE Threads::Threads ${TBB_COMMON_LINK_LIBS}) - # ------------------------------------------------------------------------------------------ # Define TBB malloc conformance tests diff --git a/third-party/tbb/test/common/utils_concurrency_limit.h b/third-party/tbb/test/common/utils_concurrency_limit.h index 4b1e8d20..9d0b3c77 100644 --- a/third-party/tbb/test/common/utils_concurrency_limit.h +++ b/third-party/tbb/test/common/utils_concurrency_limit.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2020-2022 Intel Corporation + Copyright (c) 2020-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -287,27 +287,44 @@ bool can_change_thread_priority() { return false; } -void increase_thread_priority() { #if __unix__ - pthread_t this_thread = pthread_self(); - sched_param params; - params.sched_priority = sched_get_priority_max(SCHED_FIFO); - ASSERT(params.sched_priority != -1, nullptr); - int err = pthread_setschedparam(this_thread, SCHED_FIFO, ¶ms); - ASSERT(err == 0, "Can not change thread priority."); -#endif -} +class increased_priority_guard { +public: + increased_priority_guard() : m_backup(get_current_schedparam()) { + increase_thread_priority(); + } -void decrease_thread_priority() { -#if __unix__ - pthread_t this_thread = pthread_self(); - sched_param params; - params.sched_priority = sched_get_priority_min(SCHED_FIFO); - ASSERT(params.sched_priority != -1, nullptr); - int err = pthread_setschedparam(this_thread, SCHED_FIFO, ¶ms); - ASSERT(err == 0, "Can not change thread priority."); + ~increased_priority_guard() { + // restore priority on destruction + pthread_t this_thread = pthread_self(); + int err = pthread_setschedparam(this_thread, + /*policy*/ m_backup.first, /*sched_param*/ &m_backup.second); + ASSERT(err == 0, nullptr); + } +private: + std::pair get_current_schedparam() { + pthread_t this_thread = pthread_self(); + sched_param params; + int policy = 0; + int err = pthread_getschedparam(this_thread, &policy, ¶ms); + ASSERT(err == 0, nullptr); + return std::make_pair(policy, params); + } + + void increase_thread_priority() { + pthread_t this_thread = pthread_self(); + sched_param params; + params.sched_priority = sched_get_priority_max(SCHED_FIFO); + ASSERT(params.sched_priority != -1, nullptr); + int err = pthread_setschedparam(this_thread, SCHED_FIFO, ¶ms); + ASSERT(err == 0, "Can not change thread priority."); + } + + std::pair m_backup; +}; +#else + class increased_priority_guard{}; #endif -} } // namespace utils diff --git a/third-party/tbb/test/common/utils_dynamic_libs.h b/third-party/tbb/test/common/utils_dynamic_libs.h index c84beac7..5e5365fc 100644 --- a/third-party/tbb/test/common/utils_dynamic_libs.h +++ b/third-party/tbb/test/common/utils_dynamic_libs.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -46,9 +46,17 @@ namespace utils { #endif #define EXT ".dll" #else +#if TBB_USE_APPLE_FRAMEWORKS +#define PREFIX // When built as Apple* Framework, the binary has no lib prefix +#else #define PREFIX "lib" +#endif #if __APPLE__ +#if TBB_USE_APPLE_FRAMEWORKS +#define EXT // When built as Apple* Framework, the binary has no extension +#else #define EXT ".dylib" +#endif // Android SDK build system does not support .so file name versioning #elif __FreeBSD__ || __NetBSD__ || __sun || _AIX || __ANDROID__ #define EXT ".so" @@ -58,10 +66,15 @@ namespace utils { #error Unknown OS #endif #endif +#if TBB_USE_APPLE_FRAMEWORKS +#define MALLOCFRAMEWORK "tbbmalloc.framework/" +#else +#define MALLOCFRAMEWORK +#endif // Form the names of the TBB memory allocator binaries. -#define MALLOCLIB_NAME1 PREFIX "tbbmalloc" SUFFIX1 EXT -#define MALLOCLIB_NAME2 PREFIX "tbbmalloc" SUFFIX2 EXT +#define MALLOCLIB_NAME1 MALLOCFRAMEWORK PREFIX "tbbmalloc" SUFFIX1 EXT +#define MALLOCLIB_NAME2 MALLOCFRAMEWORK PREFIX "tbbmalloc" SUFFIX2 EXT #if _WIN32 || _WIN64 using LIBRARY_HANDLE = HMODULE; diff --git a/third-party/tbb/test/conformance/conformance_blocked_rangeNd.cpp b/third-party/tbb/test/conformance/conformance_blocked_rangeNd.cpp index de54169c..52faac52 100644 --- a/third-party/tbb/test/conformance/conformance_blocked_rangeNd.cpp +++ b/third-party/tbb/test/conformance/conformance_blocked_rangeNd.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2017-2021 Intel Corporation + Copyright (c) 2017-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -245,6 +245,7 @@ TEST_CASE("Serial test") { SerialTest(); } +#if !EMSCRIPTEN //! Testing blocked_rangeNd interface with parallel_for //! \brief \ref requirement TEST_CASE("Parallel test") { @@ -253,6 +254,7 @@ TEST_CASE("Parallel test") { ParallelTest(); } } +#endif //! Testing blocked_rangeNd with proportional splitting //! \brief \ref interface \ref requirement diff --git a/third-party/tbb/test/conformance/conformance_parallel_for.cpp b/third-party/tbb/test/conformance/conformance_parallel_for.cpp index 44903f06..463ea526 100644 --- a/third-party/tbb/test/conformance/conformance_parallel_for.cpp +++ b/third-party/tbb/test/conformance/conformance_parallel_for.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -399,7 +399,9 @@ TEST_CASE("Flog test") { Flog(); Flog(); Flog(); +#if !EMSCRIPTEN Flog(); +#endif } //! Testing parallel for with different types and step diff --git a/third-party/tbb/test/conformance/conformance_parallel_for_each.cpp b/third-party/tbb/test/conformance/conformance_parallel_for_each.cpp index ad8ee672..e36a2803 100644 --- a/third-party/tbb/test/conformance/conformance_parallel_for_each.cpp +++ b/third-party/tbb/test/conformance/conformance_parallel_for_each.cpp @@ -102,10 +102,8 @@ class ForEachInvokeItem { void do_action_and_feed(oneapi::tbb::feeder& feeder) const { CHECK_MESSAGE(change_vector.size() % 2 == 0, "incorrect test setup"); std::size_t shift = change_vector.size() / 2; - std::cout << "Process " << real_value << std::endl; ++change_vector[real_value]; if (real_value < shift) { - std::cout << "Add " << real_value + shift << std::endl; feeder.add(ForEachInvokeItem(real_value + shift, change_vector)); } } diff --git a/third-party/tbb/test/conformance/conformance_parallel_reduce.cpp b/third-party/tbb/test/conformance/conformance_parallel_reduce.cpp index cf3aee9b..0214bfd9 100644 --- a/third-party/tbb/test/conformance/conformance_parallel_reduce.cpp +++ b/third-party/tbb/test/conformance/conformance_parallel_reduce.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include "common/test_invoke.h" #include "../tbb/test_partitioner.h" +#include //! \file conformance_parallel_reduce.cpp //! \brief Test for [algorithms.parallel_reduce algorithms.parallel_deterministic_reduce] specification @@ -56,6 +57,59 @@ struct ReduceBody { } }; +template +class MoveOnlyWrapper { +public: + MoveOnlyWrapper() = default; + MoveOnlyWrapper(const T& obj) : my_obj(obj) {} + + MoveOnlyWrapper(MoveOnlyWrapper&&) = default; + MoveOnlyWrapper& operator=(MoveOnlyWrapper&&) = default; + + MoveOnlyWrapper(const MoveOnlyWrapper&) = delete; + MoveOnlyWrapper& operator=(const MoveOnlyWrapper&) = delete; + + bool operator==(const MoveOnlyWrapper& other) const { return my_obj == other.my_obj; } +private: + T my_obj; +}; // class MoveOnlyWrapper + +// The container wrapper that is copyable but the copy constructor fails if the source container is non-empty +// If such an empty container is provided as an identity into parallel reduce algorithm with rvalue-friendly body, +// it should only call the copy constructor while broadcasting the identity element into the leafs +// and the identity element is an empty container for the further test +template +class EmptyCopyList { +public: + EmptyCopyList() = default; + + EmptyCopyList(EmptyCopyList&&) = default; + EmptyCopyList& operator=(EmptyCopyList&&) = default; + + EmptyCopyList(const EmptyCopyList& other) { + REQUIRE_MESSAGE(other.my_list.empty(), "reduce copied non-identity list"); + } + EmptyCopyList& operator=(const EmptyCopyList& other) { + REQUIRE_MESSAGE(other.my_list.empty(), "reduce copied non-identity list"); + return *this; + } + + typename std::list::iterator insert(typename std::list::const_iterator pos, T&& item) { + return my_list.insert(pos, std::move(item)); + } + + void splice(typename std::list::const_iterator pos, EmptyCopyList&& other) { + my_list.splice(pos, std::move(other.my_list)); + } + + typename std::list::const_iterator end() const { return my_list.end(); } + + bool operator==(const EmptyCopyList& other) const { return my_list == other.my_list; } + +private: + std::list my_list; +}; // class EmptyCopyList + template void TestDeterministicReductionFor() { const int N = 1000; @@ -174,3 +228,109 @@ TEST_CASE("parallel_[deterministic_]reduce and std::invoke") { } #endif + +template +void test_vector_of_lists_rvalue_reduce_basic(const Runner& runner, PartitionerContext&&... args) { + constexpr std::size_t n_vectors = 10000; + + using inner_type = MoveOnlyWrapper; + using list_type = EmptyCopyList; + using vector_of_lists_type = std::vector; + + vector_of_lists_type vector_of_lists; + + vector_of_lists.reserve(n_vectors); + for (std::size_t i = 0; i < n_vectors; ++i) { + list_type list; + + list.insert(list.end(), inner_type{1}); + list.insert(list.end(), inner_type{2}); + list.insert(list.end(), inner_type{3}); + list.insert(list.end(), inner_type{4}); + list.insert(list.end(), inner_type{5}); + vector_of_lists.emplace_back(std::move(list)); + } + + oneapi::tbb::blocked_range range(0, n_vectors, n_vectors * 2); + + auto reduce_body = [&](const decltype(range)& range_obj, list_type&& x) { + list_type new_list = std::move(x); + + for (std::size_t index = range_obj.begin(); index != range_obj.end(); ++index) { + new_list.splice(new_list.end(), std::move(vector_of_lists[index])); + } + return new_list; + }; + + auto join_body = [&](list_type&& x, list_type&& y) { + list_type new_list = std::move(x); + + new_list.splice(new_list.end(), std::move(y)); + return new_list; + }; + + list_type result = runner(range, list_type{}, reduce_body, join_body, std::forward(args)...); + + list_type expected_result; + + for (std::size_t i = 0; i < n_vectors; ++i) { + expected_result.insert(expected_result.end(), inner_type{1}); + expected_result.insert(expected_result.end(), inner_type{2}); + expected_result.insert(expected_result.end(), inner_type{3}); + expected_result.insert(expected_result.end(), inner_type{4}); + expected_result.insert(expected_result.end(), inner_type{5}); + } + + REQUIRE_MESSAGE(expected_result == result, "Incorrect reduce result"); +} + +struct ReduceRunner { + template + auto operator()(Args&&... args) const -> decltype(oneapi::tbb::parallel_reduce(std::forward(args)...)) { + return oneapi::tbb::parallel_reduce(std::forward(args)...); + } +}; + +struct DeterministicReduceRunner { + template + auto operator()(Args&&... args) const -> decltype(oneapi::tbb::parallel_deterministic_reduce(std::forward(args)...)) { + return oneapi::tbb::parallel_deterministic_reduce(std::forward(args)...); + } +}; + +void test_vector_of_lists_rvalue_reduce() { + ReduceRunner runner; + oneapi::tbb::affinity_partitioner af_partitioner; + oneapi::tbb::task_group_context context; + + test_vector_of_lists_rvalue_reduce_basic(runner); + test_vector_of_lists_rvalue_reduce_basic(runner, oneapi::tbb::auto_partitioner{}); + test_vector_of_lists_rvalue_reduce_basic(runner, oneapi::tbb::simple_partitioner{}); + test_vector_of_lists_rvalue_reduce_basic(runner, oneapi::tbb::static_partitioner{}); + test_vector_of_lists_rvalue_reduce_basic(runner, af_partitioner); + + test_vector_of_lists_rvalue_reduce_basic(runner, context); + test_vector_of_lists_rvalue_reduce_basic(runner, oneapi::tbb::auto_partitioner{}, context); + test_vector_of_lists_rvalue_reduce_basic(runner, oneapi::tbb::simple_partitioner{}, context); + test_vector_of_lists_rvalue_reduce_basic(runner, oneapi::tbb::static_partitioner{}, context); + test_vector_of_lists_rvalue_reduce_basic(runner, af_partitioner, context); +} + +void test_vector_of_lists_rvalue_deterministic_reduce() { + DeterministicReduceRunner runner; + oneapi::tbb::task_group_context context; + + test_vector_of_lists_rvalue_reduce_basic(runner); + test_vector_of_lists_rvalue_reduce_basic(runner, oneapi::tbb::simple_partitioner{}); + test_vector_of_lists_rvalue_reduce_basic(runner, oneapi::tbb::static_partitioner{}); + + test_vector_of_lists_rvalue_reduce_basic(runner, context); + test_vector_of_lists_rvalue_reduce_basic(runner, oneapi::tbb::simple_partitioner{}, context); + test_vector_of_lists_rvalue_reduce_basic(runner, oneapi::tbb::static_partitioner{}, context); +} + +//! \brief \ref interface \ref requirement +TEST_CASE("test rvalue optimization") { + test_vector_of_lists_rvalue_reduce(); + test_vector_of_lists_rvalue_deterministic_reduce(); +} diff --git a/third-party/tbb/test/tbb/test_collaborative_call_once.cpp b/third-party/tbb/test/tbb/test_collaborative_call_once.cpp index d8ee09fd..11a04a10 100644 --- a/third-party/tbb/test/tbb/test_collaborative_call_once.cpp +++ b/third-party/tbb/test/tbb/test_collaborative_call_once.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2022 Intel Corporation + Copyright (c) 2022-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -206,6 +206,7 @@ TEST_CASE("only calls once - move only argument") { } } +#if !EMSCRIPTEN //! Stress test for functor to be called only once //! \brief \ref interface \ref requirement \ref stress TEST_CASE("only calls once - stress test") { @@ -246,7 +247,7 @@ TEST_CASE("only calls once - stress test") { }); } } - +#endif #if TBB_USE_EXCEPTIONS //! Test for collaborative_call_once exception handling @@ -324,6 +325,7 @@ TEST_CASE("handles exceptions - stress test") { #endif +#if !EMSCRIPTEN //! Test for multiple help from moonlighting threads //! \brief \ref interface \ref requirement TEST_CASE("multiple help") { @@ -341,6 +343,7 @@ TEST_CASE("multiple help") { }); }); } +#endif //! Test for collaborative work from different arenas //! \brief \ref interface \ref requirement diff --git a/third-party/tbb/test/tbb/test_eh_algorithms.cpp b/third-party/tbb/test/tbb/test_eh_algorithms.cpp index 75c0381d..7a2b59b4 100644 --- a/third-party/tbb/test/tbb/test_eh_algorithms.cpp +++ b/third-party/tbb/test/tbb/test_eh_algorithms.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -401,7 +401,7 @@ TEST_CASE("parallel_for and parallel_reduce exception handling test #0") { } } } - +#if !EMSCRIPTEN //! Testing parallel_for and parallel_reduce exception handling //! \brief \ref error_guessing TEST_CASE("parallel_for and parallel_reduce exception handling test #1") { @@ -486,8 +486,8 @@ TEST_CASE("parallel_for and parallel_reduce exception handling test #4") { } } +#endif #endif /* TBB_USE_EXCEPTIONS */ - class ParForBodyToCancel { public: void operator()( const range_type& ) const { @@ -698,6 +698,7 @@ TEST_CASE("parallel_for and parallel_reduce cancellation test #1") { } } +#if !EMSCRIPTEN //! Testing parallel_for and parallel_reduce cancellation //! \brief \ref error_guessing TEST_CASE("parallel_for and parallel_reduce cancellation test #2") { @@ -718,6 +719,7 @@ TEST_CASE("parallel_for and parallel_reduce cancellation test #2") { } } } +#endif //! Testing parallel_for and parallel_reduce cancellation //! \brief \ref error_guessing @@ -1033,6 +1035,7 @@ void Test5_parallel_for_each () { } } // void Test5_parallel_for_each () +#if !EMSCRIPTEN //! Testing parallel_for_each exception handling //! \brief \ref error_guessing TEST_CASE("parallel_for_each exception handling test #1") { @@ -1053,6 +1056,7 @@ TEST_CASE("parallel_for_each exception handling test #1") { } } } +#endif //! Testing parallel_for_each exception handling //! \brief \ref error_guessing @@ -1075,6 +1079,7 @@ TEST_CASE("parallel_for_each exception handling test #2") { } } +#if !EMSCRIPTEN //! Testing parallel_for_each exception handling //! \brief \ref error_guessing TEST_CASE("parallel_for_each exception handling test #3") { @@ -1095,6 +1100,7 @@ TEST_CASE("parallel_for_each exception handling test #3") { } } } +#endif //! Testing parallel_for_each exception handling //! \brief \ref error_guessing @@ -1117,6 +1123,7 @@ TEST_CASE("parallel_for_each exception handling test #4") { } } +#if !EMSCRIPTEN //! Testing parallel_for_each exception handling //! \brief \ref error_guessing TEST_CASE("parallel_for_each exception handling test #5") { @@ -1139,7 +1146,7 @@ TEST_CASE("parallel_for_each exception handling test #5") { } } } - +#endif #endif /* TBB_USE_EXCEPTIONS */ class ParForEachBodyToCancel { @@ -1217,6 +1224,7 @@ void TestCancelation2_parallel_for_each () { RunCancellationTest, Cancellator2>(); } +#if !EMSCRIPTEN //! Testing parallel_for_each cancellation test //! \brief \ref error_guessing TEST_CASE("parallel_for_each cancellation test #1") { @@ -1257,6 +1265,7 @@ TEST_CASE("parallel_for_each cancellation test #2") { } } } +#endif //////////////////////////////////////////////////////////////////////////////// // Tests for tbb::parallel_pipeline @@ -1608,6 +1617,7 @@ void TestWithDifferentFiltersAndConcurrency() { #endif } +#if !EMSCRIPTEN //! Testing parallel_pipeline exception handling //! \brief \ref error_guessing TEST_CASE("parallel_pipeline exception handling test #1") { @@ -1631,7 +1641,7 @@ TEST_CASE("parallel_pipeline exception handling test #3") { TEST_CASE("parallel_pipeline exception handling test #4") { TestWithDifferentFiltersAndConcurrency(); } - +#endif #endif /* TBB_USE_EXCEPTIONS */ class FilterToCancel { @@ -1727,6 +1737,7 @@ TEST_CASE("parallel_pipeline cancellation test #1") { } } +#if !EMSCRIPTEN //! Testing parallel_pipeline cancellation //! \brief \ref error_guessing TEST_CASE("parallel_pipeline cancellation test #2") { @@ -1748,3 +1759,4 @@ TEST_CASE("parallel_pipeline cancellation test #2") { } } } +#endif diff --git a/third-party/tbb/test/tbb/test_eh_flow_graph.cpp b/third-party/tbb/test/tbb/test_eh_flow_graph.cpp index 71f38156..015d196e 100644 --- a/third-party/tbb/test/tbb/test_eh_flow_graph.cpp +++ b/third-party/tbb/test/tbb/test_eh_flow_graph.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -2017,6 +2017,7 @@ void TestOneThreadNum(int nThread) { ); } +#if !EMSCRIPTEN //! Test exceptions with parallelism //! \brief \ref error_guessing TEST_CASE("Testing several threads"){ @@ -2026,5 +2027,5 @@ TEST_CASE("Testing several threads"){ TestOneThreadNum(nThread); } } - +#endif #endif // TBB_USE_EXCEPTIONS diff --git a/third-party/tbb/test/tbb/test_eh_thread.cpp b/third-party/tbb/test/tbb/test_eh_thread.cpp index d5af9db6..a5ac1c8a 100644 --- a/third-party/tbb/test/tbb/test_eh_thread.cpp +++ b/third-party/tbb/test/tbb/test_eh_thread.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2020-2022 Intel Corporation + Copyright (c) 2020-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -75,7 +75,7 @@ class Thread { mValid = false; pthread_attr_t attr; // Limit the stack size not to consume all virtual memory on 32 bit platforms. - std::size_t stacksize = utils::max(128*1024, PTHREAD_STACK_MIN); + std::size_t stacksize = utils::max(std::size_t(128*1024), std::size_t(PTHREAD_STACK_MIN)); if (pthread_attr_init(&attr) == 0 && pthread_attr_setstacksize(&attr, stacksize) == 0) { mValid = pthread_create(&mHandle, &attr, thread_routine, /* arg = */ nullptr) == 0; } diff --git a/third-party/tbb/test/tbb/test_flow_graph_priorities.cpp b/third-party/tbb/test/tbb/test_flow_graph_priorities.cpp index 5c798063..483daadb 100644 --- a/third-party/tbb/test/tbb/test_flow_graph_priorities.cpp +++ b/third-party/tbb/test/tbb/test_flow_graph_priorities.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2018-2021 Intel Corporation + Copyright (c) 2018-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -842,6 +842,7 @@ TEST_CASE("Priority nodes take precedence"){ } } +#if !EMSCRIPTEN //! Test thread eager reaction //! \brief \ref error_guessing TEST_CASE("Thread eager reaction"){ @@ -849,6 +850,7 @@ TEST_CASE("Thread eager reaction"){ ThreadsEagerReaction::test( static_cast(p) ); } } +#endif //! Test prioritization under concurrency limits //! \brief \ref error_guessing @@ -888,3 +890,4 @@ TEST_CASE("Exceptions") { Exceptions::test(); } #endif + diff --git a/third-party/tbb/test/tbb/test_fuzzing.cpp b/third-party/tbb/test/tbb/test_fuzzing.cpp index 6571ae0d..38cd7f8a 100644 --- a/third-party/tbb/test/tbb/test_fuzzing.cpp +++ b/third-party/tbb/test/tbb/test_fuzzing.cpp @@ -15,7 +15,7 @@ */ //! \file test_fuzzing.cpp -//! \brief Test the [fuzzing] of environment variables +//! \brief Test the [internal] of environment variables #include diff --git a/third-party/tbb/test/tbb/test_global_control.cpp b/third-party/tbb/test/tbb/test_global_control.cpp index 0c3df3bf..fddbbaf6 100644 --- a/third-party/tbb/test/tbb/test_global_control.cpp +++ b/third-party/tbb/test/tbb/test_global_control.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -245,11 +245,13 @@ TEST_CASE("prolong lifetime advanced") { } #endif +#if !EMSCRIPTEN //! Testing multiple wait //! \brief \ref error_guessing TEST_CASE("prolong lifetime multiple wait") { TestBlockingTerminateNS::TestMultpleWait(); } +#endif //! \brief \ref regression TEST_CASE("test concurrent task_scheduler_handle destruction") { diff --git a/third-party/tbb/test/tbb/test_mutex.cpp b/third-party/tbb/test/tbb/test_mutex.cpp index bc7b79e3..5b78f173 100644 --- a/third-party/tbb/test/tbb/test_mutex.cpp +++ b/third-party/tbb/test/tbb/test_mutex.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -109,7 +109,6 @@ void TestTransaction(const char* name) REQUIRE_MESSAGE(n_transactions_attempted.load(std::memory_order_relaxed), "ERROR for " << name << ": transactions were never attempted"); } - //! \brief \ref error_guessing TEST_CASE("Transaction test") { if (have_TSX()) { @@ -119,6 +118,7 @@ TEST_CASE("Transaction test") { } #endif /* __TBB_TSX_TESTING_ENABLED_FOR_THIS_COMPILER */ + //! \brief \ref error_guessing TEST_CASE("test upgrade/downgrade with spin_rw_mutex") { test_rwm_upgrade_downgrade(); @@ -144,10 +144,12 @@ TEST_CASE("test spin_mutex with native threads") { test_with_native_threads::test(); } +#if !EMSCRIPTEN //! \brief \ref error_guessing TEST_CASE("test queuing_mutex with native threads") { test_with_native_threads::test(); } +#endif //! \brief \ref error_guessing TEST_CASE("test mutex with native threads") { @@ -160,11 +162,13 @@ TEST_CASE("test spin_rw_mutex with native threads") { test_with_native_threads::test_rw(); } +#if !EMSCRIPTEN //! \brief \ref error_guessing TEST_CASE("test queuing_rw_mutex with native threads") { test_with_native_threads::test(); test_with_native_threads::test_rw(); } +#endif //! \brief \ref error_guessing TEST_CASE("test rw_mutex with native threads") { @@ -197,3 +201,4 @@ TEST_CASE("internal mutex concepts") { tbb::null_rw_mutex, tbb::queuing_rw_mutex>); } #endif // __TBB_CPP20_CONCEPTS_PRESENT + diff --git a/third-party/tbb/test/tbb/test_parallel_for_each.cpp b/third-party/tbb/test/tbb/test_parallel_for_each.cpp index f6bb5090..3dfc107e 100644 --- a/third-party/tbb/test/tbb/test_parallel_for_each.cpp +++ b/third-party/tbb/test/tbb/test_parallel_for_each.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -22,6 +22,105 @@ //! \file test_parallel_for_each.cpp //! \brief Test for [algorithms.parallel_for_each] +#if __TBB_CPP20_PRESENT +// Fancy iterator type that models the C++20 iterator type +// that defines the real iterator category using iterator_concept type +// and iterator_category is always std::input_iterator_type +// Similar iterators are used by C++20 ranges (e.g. std::ranges::iota_view::iterator) +// parallel_for_each algorithm should detect such iterators with respect to iterator_concept value + +template +struct cpp20_iterator { + static_assert(std::derived_from, + "cpp20_iterator should be of at least forward iterator category"); + + using iterator_concept = Category; + using iterator_category = std::input_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + + cpp20_iterator() = default; + explicit cpp20_iterator(T* ptr) : my_ptr(ptr) {} + + T& operator*() const { return *my_ptr; } + + cpp20_iterator& operator++() { + ++my_ptr; + return *this; + } + + cpp20_iterator operator++(int) { + auto it = *this; + ++*this; + return it; + } + + cpp20_iterator& operator--() + requires std::derived_from + { + --my_ptr; + return *this; + } + + cpp20_iterator operator--(int) + requires std::derived_from + { + auto it = *this; + --*this; + return it; + } + + cpp20_iterator& operator+=(difference_type n) + requires std::derived_from + { + my_ptr += n; + return *this; + } + + cpp20_iterator& operator-=(difference_type n) + requires std::derived_from + { + my_ptr -= n; + return *this; + } + + T& operator[](difference_type n) const + requires std::derived_from + { + return my_ptr[n]; + } + + friend bool operator==(const cpp20_iterator&, const cpp20_iterator&) = default; + + friend auto operator<=>(const cpp20_iterator&, const cpp20_iterator&) + requires std::derived_from = default; + + friend cpp20_iterator operator+(cpp20_iterator i, difference_type n) + requires std::derived_from + { + return cpp20_iterator(i.my_ptr + n); + } + + friend cpp20_iterator operator+(difference_type n, cpp20_iterator i) + requires std::derived_from + { + return i + n; + } + + friend cpp20_iterator operator-(cpp20_iterator i, difference_type n) + requires std::derived_from + { + return cpp20_iterator(i.my_ptr - n); + } + + friend difference_type operator-(const cpp20_iterator& x, const cpp20_iterator& y) { + return x.my_ptr - y.my_ptr; + } +private: + T* my_ptr = nullptr; +}; // class cpp20_iterator +#endif // __TBB_CPP20_PRESENT + //! Test forward access iterator support //! \brief \ref error_guessing \ref interface TEST_CASE("Forward iterator support") { @@ -172,3 +271,65 @@ TEST_CASE("parallel_for_each constraints") { } #endif // __TBB_CPP20_CONCEPTS_PRESENT + +#if __TBB_CPP20_PRESENT + +struct no_copy_move { + no_copy_move() = default; + + no_copy_move(const no_copy_move&) = delete; + no_copy_move(no_copy_move&&) = delete; + + no_copy_move& operator=(const no_copy_move&) = delete; + no_copy_move& operator=(no_copy_move&&) = delete; + + int item = 0; +}; + +template +void test_with_cpp20_iterator() { + constexpr std::size_t n = 1'000'000; + + std::vector elements(n); + + cpp20_iterator begin(elements.data()); + cpp20_iterator end(elements.data() + n); + + oneapi::tbb::parallel_for_each(begin, end, [](no_copy_move& element) { + element.item = 42; + }); + + for (std::size_t index = 0; index < n; ++index) { + CHECK(elements[index].item == 42); + } +} + +//! \brief \ref error_guessing \ref regression +TEST_CASE("parallel_for_each with cpp20 iterator") { + // Test that parallel_for_each threats ignores iterator_category type + // if iterator_concept type is defined for iterator + + // For input iterators parallel_for_each requires element to be + // copyable or movable so since cpp20_iterator is at least forward + // parallel_for_each should work with cpp20_iterator + // on non-copyable and non-movable type + + // test cpp20_iterator implementation + using cpp20_forward_iterator = cpp20_iterator; + using cpp20_bidirectional_iterator = cpp20_iterator; + using cpp20_random_access_iterator = cpp20_iterator; + + static_assert(std::forward_iterator); + static_assert(!std::bidirectional_iterator); + + static_assert(std::bidirectional_iterator); + static_assert(!std::random_access_iterator); + + static_assert(std::random_access_iterator); + + test_with_cpp20_iterator(); + test_with_cpp20_iterator(); + test_with_cpp20_iterator(); +} + +#endif // __TBB_CPP20_PRESENT diff --git a/third-party/tbb/test/tbb/test_resumable_tasks.cpp b/third-party/tbb/test/tbb/test_resumable_tasks.cpp index a363a9ca..0cba9772 100644 --- a/third-party/tbb/test/tbb/test_resumable_tasks.cpp +++ b/third-party/tbb/test/tbb/test_resumable_tasks.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -423,6 +423,7 @@ class TestCaseGuard { thread_local bool TestCaseGuard::m_local = false; +#if !EMSCRIPTEN //! Nested test for suspend and resume //! \brief \ref error_guessing TEST_CASE("Nested test for suspend and resume") { @@ -436,6 +437,7 @@ TEST_CASE("Nested arena") { TestCaseGuard guard; TestNestedArena(); } +#endif //! Test with external threads //! \brief \ref error_guessing @@ -443,11 +445,13 @@ TEST_CASE("External threads") { TestNativeThread(); } +#if !EMSCRIPTEN //! Stress test with external threads //! \brief \ref stress TEST_CASE("Stress test with external threads") { TestCleanupMaster(); } +#endif //! Test with an arena observer //! \brief \ref error_guessing diff --git a/third-party/tbb/test/tbb/test_scheduler_mix.cpp b/third-party/tbb/test/tbb/test_scheduler_mix.cpp index c2c02bb7..8d8e0e37 100644 --- a/third-party/tbb/test/tbb/test_scheduler_mix.cpp +++ b/third-party/tbb/test/tbb/test_scheduler_mix.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2021-2022 Intel Corporation + Copyright (c) 2021-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -522,7 +522,7 @@ enum ACTIONS { num_actions }; -void global_actor(); +void global_actor(size_t arenaAfterStealing); template struct actor; @@ -543,8 +543,13 @@ struct actor { template <> struct actor { - static void do_it(Random& r) { + static void do_it(Random& r, size_t arenaAfterStealing) { static thread_local std::size_t arenaLevel = 0; + + // treat arenas index as priority: we own some resource already, + // so may pretend only to low-priority resource + arenaLevel = std::max(arenaLevel, arenaAfterStealing); + ArenaTable::ScopedLock lock; auto entry = arenaTable.acquire(r, lock); if (entry.first) { @@ -561,11 +566,13 @@ struct actor { tbb::this_task_arena::enqueue([&wctx] { wctx.release(); }); tbb::detail::d1::wait(wctx, ctx); } else { - global_actor(); + global_actor(0); } }; switch (r.get() % (16*num_arena_actions)) { case arena_execute: + // to prevent deadlock, potentially blocking operation + // may be called only for arenas with larger index if (entry.second > arenaLevel) { gStats.notify(Statistics::ArenaExecute); auto oldArenaLevel = arenaLevel; @@ -579,7 +586,9 @@ struct actor { utils_fallthrough; default: gStats.notify(Statistics::ArenaEnqueue); - entry.first->enqueue([] { global_actor(); }); + // after stealing by a worker, the task will run in arena + // with index entry.second + entry.first->enqueue([ entry ] { global_actor(entry.second); }); break; } arenaTable.release(lock); @@ -601,7 +610,7 @@ struct actor { auto doGlbAction = rnd.get() % 1000 == 42; auto body = [doGlbAction, sz](int i) { if (i == sz / 2 && doGlbAction) { - global_actor(); + global_actor(0); } }; @@ -621,7 +630,7 @@ struct actor { } }; -void global_actor() { +void global_actor(size_t arenaAfterStealing) { static thread_local std::uint64_t localNumActions{}; while (globalNumActions < maxNumActions) { @@ -629,7 +638,7 @@ void global_actor() { switch (rnd.get() % num_actions) { case arena_create: gStats.notify(Statistics::ArenaCreate); actor::do_it(rnd); break; case arena_destroy: gStats.notify(Statistics::ArenaDestroy); actor::do_it(rnd); break; - case arena_action: gStats.notify(Statistics::ArenaAcquire); actor::do_it(rnd); break; + case arena_action: gStats.notify(Statistics::ArenaAcquire); actor::do_it(rnd, arenaAfterStealing); break; case parallel_algorithm: gStats.notify(Statistics::ParallelAlgorithm); actor::do_it(rnd); break; } @@ -656,7 +665,7 @@ TEST_CASE("Stress test with mixing functionality") { utils::SpinBarrier startBarrier{numExtraThreads}; utils::NativeParallelFor(numExtraThreads, [&startBarrier](std::size_t) { startBarrier.wait(); - global_actor(); + global_actor(0); }); arenaTable.shutdown(); diff --git a/third-party/tbb/test/tbb/test_task.cpp b/third-party/tbb/test/tbb/test_task.cpp index dec24def..876e3510 100644 --- a/third-party/tbb/test/tbb/test_task.cpp +++ b/third-party/tbb/test/tbb/test_task.cpp @@ -771,7 +771,8 @@ TEST_CASE("Test with priority inversion") { auto high_priority_thread_func = [&] { // Increase external threads priority - utils::increase_thread_priority(); + utils::increased_priority_guard guard{}; + utils::suppress_unused_warning(guard); // pin external threads test_arena.execute([]{}); while (task_counter++ < critical_task_counter) { @@ -796,7 +797,8 @@ TEST_CASE("Test with priority inversion") { high_priority_threads.emplace_back(high_priority_thread_func); } - utils::increase_thread_priority(); + utils::increased_priority_guard guard{}; + utils::suppress_unused_warning(guard); while (task_counter++ < critical_task_counter) { submit(critical_task, test_arena, test_context, true); std::this_thread::sleep_for(std::chrono::milliseconds(1)); diff --git a/third-party/tbb/test/tbbmalloc/test_malloc_compliance.cpp b/third-party/tbb/test/tbbmalloc/test_malloc_compliance.cpp index 224e2476..1a85ed58 100644 --- a/third-party/tbb/test/tbbmalloc/test_malloc_compliance.cpp +++ b/third-party/tbb/test/tbbmalloc/test_malloc_compliance.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -30,6 +30,10 @@ #include "oneapi/tbb/detail/_config.h" +// There is no RLIMIT_AS on OpenBSD. +// Therefore, the tests for memory limit is unreasonable. +#if !__OpenBSD__ + #define __TBB_NO_IMPLICIT_LINKAGE 1 #include "tbb/scalable_allocator.h" @@ -1091,3 +1095,4 @@ TEST_CASE("MAIN TEST") { } #endif /* __TBB_WIN8UI_SUPPORT */ +#endif /* Enable test */ diff --git a/third-party/tbb/test/tbbmalloc/test_malloc_whitebox.cpp b/third-party/tbb/test/tbbmalloc/test_malloc_whitebox.cpp index 0f37e9f4..9de151e0 100644 --- a/third-party/tbb/test/tbbmalloc/test_malloc_whitebox.cpp +++ b/third-party/tbb/test/tbbmalloc/test_malloc_whitebox.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -1257,7 +1257,11 @@ void TestTHP() { scalable_allocation_mode(USE_HUGE_PAGES, 1); REQUIRE_MESSAGE(hugePages.isEnabled, "Huge pages should be enabled via scalable_allocation_mode"); +#if defined __loongarch64 + const int HUGE_PAGE_SIZE = 32 * 1024 * 1024; +#else const int HUGE_PAGE_SIZE = 2 * 1024 * 1024; +#endif // allocCount transparent huge pages should be allocated const int allocCount = 10; diff --git a/third-party/tbb/third-party-programs.txt b/third-party/tbb/third-party-programs.txt index b555450a..c088429c 100644 --- a/third-party/tbb/third-party-programs.txt +++ b/third-party/tbb/third-party-programs.txt @@ -1,58 +1,55 @@ oneAPI Threading Building Blocks (oneTBB) Third Party Programs File -This file contains the list of third party software ("third party programs") -contained in the Intel software and their required notices and/or license terms. -This third party software, even if included with the distribution of the Intel -software, may be governed by separate license terms, including without limitation, -third party license terms, other Intel software license terms, and open source -software license terms. These separate license terms govern your use of the third -party programs as set forth in the "third-party-programs.txt" or other similarlynamed text file. +This file is the "third-party-programs.txt" file specified in the associated Intel end user license +agreement for the Intel software you are licensing. The third party programs and their corresponding required notices and/or license terms are listed below. _______________________________________________________________________________________________________ -1. Intel(R) Instrumentation and Tracing Technology (ITT) - Copyright (c) 2022 Intel Corporation. All rights reserved. +1. Instrumentation and Tracing Technology (ITT) Notify User API: + Copyright (c) 2005-2023 Intel Corporation. All rights reserved. - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - 3. Neither the name of the copyright holder nor the names of its contributors may be - used to endorse or promote products derived from this software - without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - SUCH DAMAGE. + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. _______________________________________________________________________________________________________ -2. ActiveState Thread pool with same API as (multi) processing.Pool (Python recipe): - Copyright (c) 2008,2016 david decotigny (this file) - Copyright (c) 2006-2008, R Oudkerk (multiprocessing.Pool) +2. Portable Hardware Locality (hwloc): - Portable Hardware Locality (hwloc) - Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana University Research and Technology Corporation. All rights reserved. - Copyright (c) 2004-2005 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved. - Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. + Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana University Research and + Technology Corporation. All rights reserved. + Copyright (c) 2004-2005 The University of Tennessee and The University of Tennessee Research + Foundation. All rights reserved. + Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, University of Stuttgart. + All rights reserved. Copyright (c) 2004-2005 The Regents of the University of California. All rights reserved. Copyright (c) 2009 CNRS Copyright (c) 2009-2016 Inria. All rights reserved. - Copyright (c) 2009-2015 Université Bordeaux + Copyright (c) 2009-2015 Université Bordeaux Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. Copyright (c) 2010 IBM @@ -60,35 +57,32 @@ ________________________________________________________________________________ Copyright (c) 2012 Aleksej Saushev, The NetBSD Foundation Copyright (c) 2012 Blue Brain Project, EPFL. All rights reserved. Copyright (c) 2013-2014 University of Wisconsin-La Crosse. All rights reserved. - Copyright (c) 2015 Research Organization for Information Science and Technology (RIST). All rights reserved. + Copyright (c) 2015 Research Organization for Information Science and Technology (RIST). + All rights reserved. Copyright (c) 2015-2016 Intel, Inc. All rights reserved. - - BSD 3-clause "New" or "Revised" License + See COPYING in top-level directory. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - 3. Neither the name of author nor the names of any contributors may be - used to endorse or promote products derived from this software - without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - SUCH DAMAGE. + 3. The name of the author may not be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. _______________________________________________________________________________________________________ 3. gperftools: Copyright (c) 2011, Google Inc. @@ -126,268 +120,60 @@ ________________________________________________________________________________ 4. Mateusz Kwiatkowski Workaround for bug 62258 in libstdc++ - GPL 3.0 with GCC Runtime Library Exception 3.1 - - GNU GENERAL PUBLIC LICENSE - - Version 3, 29 June 2007 - - Copyright (c) 2007 Free Software Foundation, Inc. - - Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. - - Preamble - The GNU General Public License is a free, copyleft license for software and other kinds of works. - - The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. - - When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. - - Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. - - Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and modification follow. - - TERMS AND CONDITIONS - 0. Definitions. - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based on the Program. - - To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. - - 1. Source Code. - The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. - - A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. - - The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. - - The Corresponding Source for a work in source code form is that same work. - - 2. Basic Permissions. - All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. - - When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. - - 4. Conveying Verbatim Copies. - You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified it, and giving a relevant date. - b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". - c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. - d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. - A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. - - 6. Conveying Non-Source Forms. - You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: - - a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. - b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. - c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. - d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. - e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. - A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. - - If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). - - The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. - - 7. Additional Terms. - "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or - b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or - c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or - d) Limiting the use for publicity purposes of names of licensors or authors of the material; or - e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or - f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. - All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. - - 8. Termination. - You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). - - However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. - - Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. - - 9. Acceptance Not Required for Having Copies. - You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. - - 11. Patents. - A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. - - If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. - - A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. - - 14. Revised Versions of this License. - The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. - - Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. - - Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. - - 15. Disclaimer of Warranty. - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . - - The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . - - - GCC RUNTIME LIBRARY EXCEPTION - - Version 3.1, 31 March 2009 - - Copyright (c) 2009 Free Software Foundation, Inc. - - Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. - - This GCC Runtime Library Exception ("Exception") is an additional permission under section 7 of the GNU General Public License, version 3 ("GPLv3"). It applies to a given file (the "Runtime Library") that bears a notice placed by the copyright holder of the file stating that the file is governed by GPLv3 along with this Exception. - - When you use GCC to compile a program, GCC may combine portions of certain GCC header files and runtime libraries with the compiled program. The purpose of this Exception is to allow compilation of non-GPL (including proprietary) programs to use, in this way, the header files and runtime libraries covered by this Exception. - - 0. Definitions. - A file is an "Independent Module" if it either requires the Runtime Library for execution after a Compilation Process, or makes use of an interface provided by the Runtime Library, but is not otherwise based on the Runtime Library. - - "GCC" means a version of the GNU Compiler Collection, with or without modifications, governed by version 3 (or a specified later version) of the GNU General Public License (GPL) with the option of using any subsequent versions published by the FSF. - - "GPL-compatible Software" is software whose conditions of propagation, modification and use would permit combination with GCC in accord with the license of GCC. - - "Target Code" refers to output from any compiler for a real or virtual target processor architecture, in executable form or suitable for input to an assembler, loader, linker and/or execution phase. Notwithstanding that, Target Code does not include data in any format that is used as a compiler intermediate representation, or used for producing a compiler intermediate representation. - - The "Compilation Process" transforms code entirely represented in non-intermediate languages designed for human-written code, and/or in Java Virtual Machine byte code, into Target Code. Thus, for example, use of source code generators and preprocessors need not be considered part of the Compilation Process, since the Compilation Process can be understood as starting with the output of the generators or preprocessors. - - A Compilation Process is "Eligible" if it is done using GCC, alone or with other GPL-compatible software, or if it is done without using any work based on GCC. For example, using non-GPL-compatible Software to optimize any GCC intermediate representations would not qualify as an Eligible Compilation Process. - - 1. Grant of Additional Permission. - You have permission to propagate a work of Target Code formed by combining the Runtime Library with Independent Modules, even if such propagation would otherwise violate the terms of GPLv3, provided that all Target Code was generated by Eligible Compilation Processes. You may then convey such a combination under terms of your choice, consistent with the licensing of the Independent Modules. + ******************************************************************************** + * Author: Mateusz Kwiatkowski * + * * + * I hereby renounce all copyright to this file and my rights resulting from * + * it, to the broadest extent permitted by law. It may be treated as public * + * domain. * + * * + * However, as this file interfaces with GCC internal ABI, it may be subject to * + * the terms and conditions of the GNU General Public License. Please consult * + * the GCC licensing terms and/or a lawyer for details. * + * * + * Note that libstdc++ licensing terms grant additional permissions described * + * in the GCC Runtime Library Exception, version 3.1, as published by the * + * Free Software Foundation. * + *******************************************************************************/ +_______________________________________________________________________________________________________ - 2. No Weakening of GCC Copyleft. - The availability of this Exception does not imply any general presumption that third-party software is unaffected by the copyleft requirements of the license of GCC. +5. ActiveState Thread pool with same API as (multi) processing. Pool (Python recipe) + + # + # Copyright (c) 2008,2016 david decotigny (this file) + # Copyright (c) 2006-2008, R Oudkerk (multiprocessing.Pool) + # All rights reserved. + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions + # are met: + # + # 1. Redistributions of source code must retain the above copyright + # notice, this list of conditions and the following disclaimer. + # 2. Redistributions in binary form must reproduce the above copyright + # notice, this list of conditions and the following disclaimer in the + # documentation and/or other materials provided with the distribution. + # 3. Neither the name of author nor the names of any contributors may be + # used to endorse or promote products derived from this software + # without specific prior written permission. + # + # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND + # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + # SUCH DAMAGE. _______________________________________________________________________________________________________ -5. Doctest - - Copyright (c) 2016-2021 Viktor Kirilov +6. doctest - The MIT License (MIT) + Copyright (c) 2016-2023 Viktor Kirilov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -406,6 +192,7 @@ ________________________________________________________________________________ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + _______________________________________________________________________________________________________ -*Other names and brands may be claimed as the property of others. +*Other names and brands may be claimed as the property of others. \ No newline at end of file