diff --git a/3rdparty/exported/snmalloc/CMakeLists.txt b/3rdparty/exported/snmalloc/CMakeLists.txt index 9a5bb261e642..1d9cbb6d1bcf 100644 --- a/3rdparty/exported/snmalloc/CMakeLists.txt +++ b/3rdparty/exported/snmalloc/CMakeLists.txt @@ -8,8 +8,13 @@ endif() include(CheckCXXCompilerFlag) include(CheckCXXSourceCompiles) +include(CheckIncludeFileCXX) +include(CheckIPOSupported) include(CMakeDependentOption) +# Name chosen for compatibility with CTest. +option(SNMALLOC_BUILD_TESTING "Build test programs as well as shims" ON) + option(SNMALLOC_HEADER_ONLY_LIBRARY "Use snmalloc has a header-only library" OFF) # Options that apply globally option(SNMALLOC_CI_BUILD "Disable features not sensible for CI" OFF) @@ -18,6 +23,9 @@ option(SNMALLOC_USE_CXX17 "Build as C++17 for legacy support." OFF) option(SNMALLOC_TRACING "Enable large quantities of debug output." OFF) option(SNMALLOC_NO_REALLOCARRAY "Build without reallocarray exported" ON) option(SNMALLOC_NO_REALLOCARR "Build without reallocarr exported" ON) +option(SNMALLOC_LINK_ICF "Link with Identical Code Folding" ON) +option(SNMALLOC_IPO "Link with IPO/LTO support" OFF) +option(SNMALLOC_BENCHMARK_INDIVIDUAL_MITIGATIONS "Build tests and ld_preload for individual mitigations" OFF) # Options that apply only if we're not building the header-only library cmake_dependent_option(SNMALLOC_RUST_SUPPORT "Build static library for rust" OFF "NOT SNMALLOC_HEADER_ONLY_LIBRARY" OFF) cmake_dependent_option(SNMALLOC_STATIC_LIBRARY "Build static libraries" ON "NOT SNMALLOC_HEADER_ONLY_LIBRARY" OFF) @@ -38,6 +46,7 @@ if (NOT SNMALLOC_HEADER_ONLY_LIBRARY) set_property(CACHE SNMALLOC_CLEANUP PROPERTY STRINGS THREAD_CLEANUP PTHREAD_DESTRUCTORS CXX11_DESTRUCTORS) set(SNMALLOC_STATIC_LIBRARY_PREFIX "sn_" CACHE STRING "Static library function prefix") + set(SNMALLOC_COMPILER_SUPPORT_IPO FALSE) else () unset(SNMALLOC_STATIC_LIBRARY_PREFIX CACHE) unset(SNMALLOC_CLEANUP CACHE) @@ -104,6 +113,14 @@ int main() { return res; } " SNMALLOC_PLATFORM_HAS_GETENTROPY) + +# check if linux/random.h is available +# older libcs might not have sys/random.h +# but some might provide the necessary flags via linux/random.h +# the __has_include macro isn't working properly on all platforms for that header +# this is why we check its existence here +CHECK_INCLUDE_FILE_CXX(linux/random.h SNMALLOC_HAS_LINUX_RANDOM_H) + # Provide as function so other projects can reuse # FIXME: This modifies some variables that may or may not be the ones that # provide flags and so is broken by design. It should be removed once Verona @@ -192,6 +209,13 @@ if(SNMALLOC_COMPILER_SUPPORT_MCX16) target_compile_options(snmalloc INTERFACE $<$:-mcx16>) endif() +if (NOT SNMALLOC_HEADER_ONLY_LIBRARY AND SNMALLOC_IPO) + check_ipo_supported(RESULT HAS_IPO) + if (HAS_IPO) + set(SNMALLOC_COMPILER_SUPPORT_IPO TRUE) + endif() +endif() + # Helper function that conditionally defines a macro for the build target if # the CMake variable of the same name is set. function(add_as_define FLAG) @@ -202,6 +226,7 @@ add_as_define(SNMALLOC_QEMU_WORKAROUND) add_as_define(SNMALLOC_TRACING) add_as_define(SNMALLOC_CI_BUILD) add_as_define(SNMALLOC_PLATFORM_HAS_GETENTROPY) +add_as_define(SNMALLOC_HAS_LINUX_RANDOM_H) if (SNMALLOC_NO_REALLOCARRAY) add_as_define(SNMALLOC_NO_REALLOCARRAY) endif() @@ -247,7 +272,6 @@ function(add_warning_flags name) $<$:$<${ci_or_debug}:/DEBUG>>) endfunction() - # To build with just the header library target define SNMALLOC_HEADER_ONLY_LIBRARY if(NOT SNMALLOC_HEADER_ONLY_LIBRARY) @@ -262,9 +286,81 @@ if(NOT SNMALLOC_HEADER_ONLY_LIBRARY) set(${result} ${dirlist} PARENT_SCOPE) endfunction() + set(TESTDIR ${CMAKE_CURRENT_SOURCE_DIR}/src/test) + + if(SNMALLOC_BUILD_TESTING) + enable_testing() + subdirlist(TEST_CATEGORIES ${TESTDIR}) + else() + set(TEST_CATEGORIES "") + endif() + list(REVERSE TEST_CATEGORIES) + + if (${SNMALLOC_CLEANUP} STREQUAL THREAD_CLEANUP) + set(TEST_CLEANUP PTHREAD_DESTRUCTORS) + else () + set(TEST_CLEANUP ${SNMALLOC_CLEANUP}) + endif() + + function(make_tests TAG DEFINES) + foreach(TEST_CATEGORY ${TEST_CATEGORIES}) + message(STATUS "Adding ${TAG}/${TEST_CATEGORY} tests") + subdirlist(TESTS ${TESTDIR}/${TEST_CATEGORY}) + foreach(TEST ${TESTS}) + unset(SRC) + aux_source_directory(${TESTDIR}/${TEST_CATEGORY}/${TEST} SRC) + set(TESTNAME "${TEST_CATEGORY}-${TEST}-${TAG}") + + add_executable(${TESTNAME} ${SRC}) + + if(SNMALLOC_SANITIZER) + target_compile_options(${TESTNAME} PRIVATE -g -fsanitize=${SNMALLOC_SANITIZER} -fno-omit-frame-pointer) + target_link_libraries(${TESTNAME} -fsanitize=${SNMALLOC_SANITIZER}) + endif() + + add_warning_flags(${TESTNAME}) + + target_link_libraries(${TESTNAME} snmalloc) + target_compile_definitions(${TESTNAME} PRIVATE "SNMALLOC_USE_${TEST_CLEANUP}") + + if (NOT DEFINES STREQUAL " ") + target_compile_definitions(${TESTNAME} PRIVATE ${DEFINES}) + endif() + + if (${TEST} MATCHES "release-.*") + message(VERBOSE "Adding test: ${TESTNAME} only for release configs") + add_test(NAME ${TESTNAME} COMMAND ${TESTNAME} CONFIGURATIONS "Release") + else() + message(VERBOSE "Adding test: ${TESTNAME}") + add_test(${TESTNAME} ${TESTNAME}) + endif() + if (${TEST_CATEGORY} MATCHES "perf") + message(VERBOSE "Single threaded test: ${TESTNAME}") + set_tests_properties(${TESTNAME} PROPERTIES PROCESSORS 4) + endif() + if(WIN32) + # On Windows these tests use a lot of memory as it doesn't support + # lazy commit. + if (${TEST} MATCHES "two_alloc_types") + message(VERBOSE "Single threaded test: ${TESTNAME}") + set_tests_properties(${TESTNAME} PROPERTIES PROCESSORS 4) + endif() + if (${TEST} MATCHES "fixed_region") + message(VERBOSE "Single threaded test: ${TESTNAME}") + set_tests_properties(${TESTNAME} PROPERTIES PROCESSORS 4) + endif() + if (${TEST} MATCHES "memory") + message(VERBOSE "Single threaded test: ${TESTNAME}") + set_tests_properties(${TESTNAME} PROPERTIES PROCESSORS 4) + endif() + endif() + endforeach() + endforeach() + endfunction() + if(NOT (DEFINED SNMALLOC_LINKER_FLAVOUR) OR ("${SNMALLOC_LINKER_FLAVOUR}" MATCHES "^$")) # Linker not specified externally; probe to see if we can make lld work - set(CMAKE_REQUIRED_LINK_OPTIONS -fuse-ld=lld) + set(CMAKE_REQUIRED_LINK_OPTIONS -fuse-ld=lld -Wl,--icf=all) check_cxx_source_compiles("int main() { return 1; }" LLD_WORKS) if (LLD_WORKS) message(STATUS "Using LLD to link snmalloc shims") @@ -282,7 +378,7 @@ if(NOT SNMALLOC_HEADER_ONLY_LIBRARY) function(add_shim name type) add_library(${name} ${type} ${ARGN}) target_link_libraries(${name} snmalloc) - set_target_properties(${name} PROPERTIES CXX_VISIBILITY_PRESET hidden) + set_target_properties(${name} PROPERTIES CXX_VISIBILITY_PRESET hidden INTERPROCEDURAL_OPTIMIZATION ${SNMALLOC_COMPILER_SUPPORT_IPO}) target_compile_definitions(${name} PRIVATE "SNMALLOC_USE_${SNMALLOC_CLEANUP}") add_warning_flags(${name}) @@ -299,7 +395,7 @@ if(NOT SNMALLOC_HEADER_ONLY_LIBRARY) if(SNMALLOC_OPTIMISE_FOR_CURRENT_MACHINE) check_cxx_compiler_flag(-march=native SUPPORT_MARCH_NATIVE) if (SUPPORT_MARCH_NATIVE) - target_compile_options(${name} -march=native) + target_compile_options(${name} PRIVATE -march=native) else() message(WARNING "Compiler does not support `-march=native` required by SNMALLOC_OPTIMISE_FOR_CURRENT_MACHINE") endif() @@ -318,7 +414,7 @@ if(NOT SNMALLOC_HEADER_ONLY_LIBRARY) endif() endif() # Remove all the duplicate new/malloc and free/delete definitions - target_link_options(${name} PRIVATE $<$:-Wl,--icf=all -fuse-ld=lld>) + target_link_options(${name} PRIVATE $<$:$<$:-Wl,--icf=all> -fuse-ld=lld>) endif() target_compile_definitions(${name} PRIVATE @@ -352,86 +448,78 @@ if(NOT SNMALLOC_HEADER_ONLY_LIBRARY) target_compile_definitions(snmallocshim-checks-rust PRIVATE SNMALLOC_CHECK_CLIENT) endif() - enable_testing() + if (SNMALLOC_BUILD_TESTING) + if (WIN32 + OR (CMAKE_SYSTEM_NAME STREQUAL NetBSD) + OR (CMAKE_SYSTEM_NAME STREQUAL OpenBSD) + OR (CMAKE_SYSTEM_NAME STREQUAL SunOS)) + # Windows does not support aligned allocation well enough + # for pass through. + # NetBSD, OpenBSD and DragonFlyBSD do not support malloc*size calls. + set(FLAVOURS fast;check) + else() + set(FLAVOURS fast;check;malloc) + endif() - set(TESTDIR ${CMAKE_CURRENT_SOURCE_DIR}/src/test) - subdirlist(TEST_CATEGORIES ${TESTDIR}) - list(REVERSE TEST_CATEGORIES) - if (${SNMALLOC_CLEANUP} STREQUAL THREAD_CLEANUP) - set(TEST_CLEANUP PTHREAD_DESTRUCTORS) - else () - set(TEST_CLEANUP ${SNMALLOC_CLEANUP}) - endif() - foreach(TEST_CATEGORY ${TEST_CATEGORIES}) - message(STATUS "Adding ${TEST_CATEGORY} tests") - subdirlist(TESTS ${TESTDIR}/${TEST_CATEGORY}) - foreach(TEST ${TESTS}) - if (WIN32 - OR (CMAKE_SYSTEM_NAME STREQUAL NetBSD) - OR (CMAKE_SYSTEM_NAME STREQUAL OpenBSD) - OR (CMAKE_SYSTEM_NAME STREQUAL DragonFly) - OR (CMAKE_SYSTEM_NAME STREQUAL SunOS)) - # Windows does not support aligned allocation well enough - # for pass through. - # NetBSD, OpenBSD and DragonFlyBSD do not support malloc*size calls. - set(FLAVOURS fast;check) - else() - set(FLAVOURS fast;check;malloc) + foreach(FLAVOUR ${FLAVOURS}) + if (${FLAVOUR} STREQUAL "malloc") + set(DEFINES SNMALLOC_PASS_THROUGH) + endif() + if (${FLAVOUR} STREQUAL "check") + set(DEFINES SNMALLOC_CHECK_CLIENT) + endif() + if (${FLAVOUR} STREQUAL "fast") + set(DEFINES " ") endif() - foreach(FLAVOUR ${FLAVOURS}) - unset(SRC) - aux_source_directory(${TESTDIR}/${TEST_CATEGORY}/${TEST} SRC) - set(TESTNAME "${TEST_CATEGORY}-${TEST}-${FLAVOUR}") - - add_executable(${TESTNAME} ${SRC}) - if(SNMALLOC_SANITIZER) - target_compile_options(${TESTNAME} PRIVATE -g -fsanitize=${SNMALLOC_SANITIZER} -fno-omit-frame-pointer) - target_link_libraries(${TESTNAME} -fsanitize=${SNMALLOC_SANITIZER}) - endif() + make_tests(${FLAVOUR} ${DEFINES}) + endforeach() + endif() - add_warning_flags(${TESTNAME}) + if (SNMALLOC_BENCHMARK_INDIVIDUAL_MITIGATIONS) + set (MITIGATIONS + metadata_protection; + pal_enforce_access; + random_pagemap; + sanity_checks; + freelist_forward_edge; + freelist_backward_edge; + freelist_teardown_validate; + reuse_LIFO; + random_larger_thresholds; + random_initial; + random_preserve; + random_extra_slab) + + + foreach (MITIGATION ${MITIGATIONS}) + set(DEFINES "SNMALLOC_CHECK_CLIENT_MITIGATIONS=${MITIGATION}") + add_shim(snmallocshim-${MITIGATION} SHARED ${SHIM_FILES}) + target_compile_definitions(snmallocshim-${MITIGATION} PRIVATE ${DEFINES}) + if (SNMALLOC_BUILD_TESTING) + make_tests(${MITIGATION} ${DEFINES}) + endif() + endforeach() - if (${FLAVOUR} STREQUAL "malloc") - target_compile_definitions(${TESTNAME} PRIVATE SNMALLOC_PASS_THROUGH) - endif() - if (${FLAVOUR} STREQUAL "check") - target_compile_definitions(${TESTNAME} PRIVATE SNMALLOC_CHECK_CLIENT) - endif() - target_link_libraries(${TESTNAME} snmalloc) - target_compile_definitions(${TESTNAME} PRIVATE "SNMALLOC_USE_${TEST_CLEANUP}") - if (${TEST} MATCHES "release-.*") - message(VERBOSE "Adding test: ${TESTNAME} only for release configs") - add_test(NAME ${TESTNAME} COMMAND ${TESTNAME} CONFIGURATIONS "Release") - else() - message(VERBOSE "Adding test: ${TESTNAME}") - add_test(${TESTNAME} ${TESTNAME}) - endif() - if (${TEST_CATEGORY} MATCHES "perf") - message(VERBOSE "Single threaded test: ${TESTNAME}") - set_tests_properties(${TESTNAME} PROPERTIES PROCESSORS 4) - endif() - if(WIN32) - # On Windows these tests use a lot of memory as it doesn't support - # lazy commit. - if (${TEST} MATCHES "two_alloc_types") - message(VERBOSE "Single threaded test: ${TESTNAME}") - set_tests_properties(${TESTNAME} PROPERTIES PROCESSORS 4) - endif() - if (${TEST} MATCHES "fixed_region") - message(VERBOSE "Single threaded test: ${TESTNAME}") - set_tests_properties(${TESTNAME} PROPERTIES PROCESSORS 4) - endif() - if (${TEST} MATCHES "memory") - message(VERBOSE "Single threaded test: ${TESTNAME}") - set_tests_properties(${TESTNAME} PROPERTIES PROCESSORS 4) - endif() - endif() - endforeach() + set(MITIGATIONSET "no_checks") + set(COUNT 0) + foreach (MITIGATION ${MITIGATIONS}) + MATH(EXPR COUNT "${COUNT} + 1") + set(MITIGATIONNAME "mitigations-${COUNT}") + set(MITIGATIONSET "${MITIGATIONSET}+${MITIGATION}") + message(STATUS "MITIGATIONSET: ${COUNT} -> ${MITIGATIONSET}") + set(DEFINES "-DSNMALLOC_CHECK_CLIENT_MITIGATIONS=${MITIGATIONSET}") + add_shim(snmallocshim-${MITIGATIONNAME} SHARED ${SHIM_FILES}) + target_compile_definitions(snmallocshim-${MITIGATIONNAME} PRIVATE ${DEFINES}) + if (SNMALLOC_BUILD_TESTING) + make_tests(${MITIGATIONNAME} ${DEFINES}) + endif() endforeach() - endforeach() + endif() - clangformat_targets() + if (SNMALLOC_BUILD_TESTING) + clangformat_targets() + endif () endif() install(TARGETS snmalloc EXPORT snmallocConfig) diff --git a/3rdparty/exported/snmalloc/README.md b/3rdparty/exported/snmalloc/README.md index c57b2a9af842..8dcd9d119955 100644 --- a/3rdparty/exported/snmalloc/README.md +++ b/3rdparty/exported/snmalloc/README.md @@ -32,7 +32,7 @@ do not for snmalloc. The implementation of snmalloc has evolved significantly since the [initial paper](snmalloc.pdf). The mechanism for returning memory to remote threads has remained, but most of the meta-data layout has changed. We recommend you read [docs/security](./docs/security/README.md) to find out about the current design, and -if you want to dive into the code (./docs/AddressSpace.md) provides a good overview of the allocation and deallocation paths. +if you want to dive into the code [docs/AddressSpace.md](./docs/AddressSpace.md) provides a good overview of the allocation and deallocation paths. [![snmalloc CI](https://github.com/microsoft/snmalloc/actions/workflows/main.yml/badge.svg?branch=master)](https://github.com/microsoft/snmalloc/actions/workflows/main.yml) diff --git a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal.h b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal.h index 413c284b0153..49b92da682cb 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal.h @@ -174,7 +174,7 @@ namespace snmalloc } else { -#if __has_builtin(__builtin_readcyclecounter) && \ +#if __has_builtin(__builtin_readcyclecounter) && !defined(__APPLE__) && \ !defined(SNMALLOC_NO_AAL_BUILTINS) return __builtin_readcyclecounter(); #else @@ -198,8 +198,8 @@ namespace snmalloc */ template< typename T, - SNMALLOC_CONCEPT(capptr::ConceptBound) BOut, - SNMALLOC_CONCEPT(capptr::ConceptBound) BIn, + SNMALLOC_CONCEPT(capptr::IsBound) BOut, + SNMALLOC_CONCEPT(capptr::IsBound) BIn, typename U = T> static SNMALLOC_FAST_PATH CapPtr capptr_bound(CapPtr a, size_t size) noexcept @@ -212,7 +212,26 @@ namespace snmalloc "capptr_bound must preserve non-spatial CapPtr dimensions"); UNUSED(size); - return CapPtr(a.template as_static().unsafe_ptr()); + return CapPtr::unsafe_from( + a.template as_static().unsafe_ptr()); + } + + template< + typename T, + SNMALLOC_CONCEPT(capptr::IsBound) BOut, + SNMALLOC_CONCEPT(capptr::IsBound) BIn, + typename U = T> + static SNMALLOC_FAST_PATH CapPtr + capptr_rebound(CapPtr a, CapPtr b) noexcept + { + UNUSED(a); + return CapPtr::unsafe_from( + b.template as_static().unsafe_ptr()); + } + + static SNMALLOC_FAST_PATH size_t capptr_size_round(size_t sz) noexcept + { + return sz; } }; } // namespace snmalloc @@ -243,8 +262,19 @@ namespace snmalloc using Aal = AAL_Generic>; #endif - template - constexpr static bool aal_supports = (AAL::aal_features & F) == F; + template + constexpr bool aal_supports = (AAL::aal_features & F) == F; + + /* + * The backend's leading-order response to StrictProvenance is entirely + * within its data structures and not actually anything to do with the + * architecture. Rather than test aal_supports or + * defined(__CHERI_PURE_CAPABILITY__) or such therein, using this + * backend_strict_provenance flag makes it easy to test a lot of machinery + * on non-StrictProvenance architectures. + */ + static constexpr bool backend_strict_provenance = + aal_supports; } // namespace snmalloc #ifdef __POINTER_WIDTH__ diff --git a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_arm.h b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_arm.h index 39bcd95a4e8c..b6bae779e4de 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_arm.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_arm.h @@ -24,8 +24,11 @@ namespace snmalloc /** * Bitmap of AalFeature flags */ - static constexpr uint64_t aal_features = - IntegerPointers | NoCpuCycleCounters; + static constexpr uint64_t aal_features = IntegerPointers +#if defined(SNMALLOC_VA_BITS_32) || !defined(__APPLE__) + | NoCpuCycleCounters +#endif + ; static constexpr enum AalName aal_name = ARM; @@ -56,6 +59,15 @@ namespace snmalloc __asm__ volatile("pld\t[%0]" : "=r"(ptr)); #endif } + +#if defined(SNMALLOC_VA_BITS_64) && defined(__APPLE__) + static inline uint64_t tick() noexcept + { + uint64_t t; + __asm__ volatile("mrs %0, cntvct_el0" : "=r"(t)); + return t; + } +#endif }; using AAL_Arch = AAL_arm; diff --git a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_cheri.h b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_cheri.h index 4774dde6799d..4a4acd379a04 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_cheri.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_cheri.h @@ -63,8 +63,8 @@ namespace snmalloc template< typename T, - SNMALLOC_CONCEPT(capptr::ConceptBound) BOut, - SNMALLOC_CONCEPT(capptr::ConceptBound) BIn, + SNMALLOC_CONCEPT(capptr::IsBound) BOut, + SNMALLOC_CONCEPT(capptr::IsBound) BIn, typename U = T> static SNMALLOC_FAST_PATH CapPtr capptr_bound(CapPtr a, size_t size) noexcept @@ -86,7 +86,47 @@ namespace snmalloc } void* pb = __builtin_cheri_bounds_set_exact(a.unsafe_ptr(), size); - return CapPtr(static_cast(pb)); + + SNMALLOC_ASSERT( + __builtin_cheri_tag_get(pb) && "capptr_bound exactness failed."); + + return CapPtr::unsafe_from(static_cast(pb)); + } + + template< + typename T, + SNMALLOC_CONCEPT(capptr::IsBound) BOut, + SNMALLOC_CONCEPT(capptr::IsBound) BIn, + typename U = T> + static SNMALLOC_FAST_PATH CapPtr + capptr_rebound(CapPtr a, CapPtr b) noexcept + { + return CapPtr::unsafe_from(static_cast( + __builtin_cheri_address_set(a.unsafe_ptr(), address_cast(b)))); + } + + static SNMALLOC_FAST_PATH size_t capptr_size_round(size_t sz) noexcept + { + /* + * Round up sz to the next representable value for the target + * architecture's choice of CHERI Concentrate T/B mantissa width. + * + * On Morello specifically, this intrinsic will (soon, as of this text + * being written) expand to a multi-instruction sequence to work around a + * bug in which sz values satisfying $\exists_E sz = ((1 << 12) - 1) << + * (E + 3)$ are incorrectly increased. If for some reason this ends up + * being at all hot, there will also be a + * __builtin_morello_round_representable_length_inexact, which will just + * return too big of a size for those values (by rounding up to $1 << (E + * + 15)$). While technically incorrect, this behavior is probably fine + * for snmalloc: we already slot metadata allocations into NAPOT holes + * and then return any unused space at the top, so the over-reach would, + * at the worst, just prevent said return, and our sizeclasses never run + * into this issue. That is, we're clear to use the __builtin_morello_* + * intrinsic if the multi-instruction sequence proves slow. See + * https://git.morello-project.org/morello/llvm-project/-/merge_requests/199 + */ + return __builtin_cheri_round_representable_length(sz); } }; } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_concept.h b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_concept.h index 6269d9629a76..3ce64a79ccc8 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_concept.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/aal/aal_concept.h @@ -14,7 +14,7 @@ namespace snmalloc * machine word size, and an upper bound on the address space size */ template - concept ConceptAAL_static_members = requires() + concept IsAAL_static_members = requires() { typename std::integral_constant; typename std::integral_constant; @@ -26,7 +26,7 @@ namespace snmalloc * AALs provide a prefetch operation. */ template - concept ConceptAAL_prefetch = requires(void* ptr) + concept IsAAL_prefetch = requires(void* ptr) { { AAL::prefetch(ptr) @@ -38,7 +38,7 @@ namespace snmalloc * AALs provide a notion of high-precision timing. */ template - concept ConceptAAL_tick = requires() + concept IsAAL_tick = requires() { { AAL::tick() @@ -47,7 +47,7 @@ namespace snmalloc }; template - concept ConceptAAL_capptr_methods = + concept IsAAL_capptr_methods = requires(capptr::Chunk auth, capptr::AllocFull ret, size_t sz) { /** @@ -59,12 +59,42 @@ namespace snmalloc AAL::template capptr_bound(auth, sz) } noexcept->ConceptSame>; + + /** + * "Amplify" by copying the address of one pointer into one of higher + * privilege. The resulting pointer differs from auth only in address. + */ + { + AAL::capptr_rebound(auth, ret) + } + noexcept->ConceptSame>; + + /** + * Round up an allocation size to a size this architecture can represent. + * While there may also, in general, be alignment requirements for + * representability, in snmalloc so far we have not had reason to consider + * these explicitly: when we use our... + * + * - sizeclass machinery (for user-facing data), we assume that all + * sizeclasses describe architecturally representable aligned-and-sized + * regions + * + * - Range machinery (for internal meta-data), we always choose NAPOT + * regions big enough for the requested size (returning space above the + * allocation within such regions for use as smaller NAPOT regions). + * + * That is, capptr_size_round is not needed on the user-facing fast paths, + * merely internally for bootstrap and metadata management. + */ + { + AAL::capptr_size_round(sz) + } + noexcept->ConceptSame; }; template - concept ConceptAAL = - ConceptAAL_static_members&& ConceptAAL_prefetch&& - ConceptAAL_tick&& ConceptAAL_capptr_methods; + concept IsAAL = IsAAL_static_members&& IsAAL_prefetch&& + IsAAL_tick&& IsAAL_capptr_methods; } // namespace snmalloc #endif diff --git a/3rdparty/exported/snmalloc/src/snmalloc/aal/address.h b/3rdparty/exported/snmalloc/src/snmalloc/aal/address.h index 1f528f980a0f..6c1a760bf84d 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/aal/address.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/aal/address.h @@ -14,7 +14,8 @@ namespace snmalloc /** * Perform arithmetic on a uintptr_t. */ - inline uintptr_t pointer_offset(uintptr_t base, size_t diff) + SNMALLOC_FAST_PATH_INLINE uintptr_t + pointer_offset(uintptr_t base, size_t diff) { return base + diff; } @@ -23,42 +24,44 @@ namespace snmalloc * Perform pointer arithmetic and return the adjusted pointer. */ template - inline U* pointer_offset(T* base, size_t diff) + SNMALLOC_FAST_PATH_INLINE U* pointer_offset(T* base, size_t diff) { SNMALLOC_ASSERT(base != nullptr); /* Avoid UB */ return unsafe_from_uintptr( unsafe_to_uintptr(base) + static_cast(diff)); } - template - inline CapPtr + template + SNMALLOC_FAST_PATH_INLINE CapPtr pointer_offset(CapPtr base, size_t diff) { - return CapPtr(pointer_offset(base.unsafe_ptr(), diff)); + return CapPtr::unsafe_from( + pointer_offset(base.unsafe_ptr(), diff)); } /** * Perform pointer arithmetic and return the adjusted pointer. */ template - inline U* pointer_offset_signed(T* base, ptrdiff_t diff) + SNMALLOC_FAST_PATH_INLINE U* pointer_offset_signed(T* base, ptrdiff_t diff) { SNMALLOC_ASSERT(base != nullptr); /* Avoid UB */ return reinterpret_cast(reinterpret_cast(base) + diff); } - template - inline CapPtr + template + SNMALLOC_FAST_PATH_INLINE CapPtr pointer_offset_signed(CapPtr base, ptrdiff_t diff) { - return CapPtr(pointer_offset_signed(base.unsafe_ptr(), diff)); + return CapPtr::unsafe_from( + pointer_offset_signed(base.unsafe_ptr(), diff)); } /** * Cast from a pointer type to an address. */ template - inline SNMALLOC_FAST_PATH address_t address_cast(T* ptr) + SNMALLOC_FAST_PATH_INLINE address_t address_cast(T* ptr) { return reinterpret_cast(ptr); } @@ -70,13 +73,13 @@ namespace snmalloc * as per above, and uses the wrapper types in its own definition, e.g., of * capptr_bound. */ - template - inline SNMALLOC_FAST_PATH address_t address_cast(CapPtr a) + template + SNMALLOC_FAST_PATH_INLINE address_t address_cast(CapPtr a) { return address_cast(a.unsafe_ptr()); } - inline SNMALLOC_FAST_PATH address_t address_cast(uintptr_t a) + SNMALLOC_FAST_PATH_INLINE address_t address_cast(uintptr_t a) { return static_cast(a); } @@ -86,7 +89,7 @@ namespace snmalloc * two. */ template - static inline bool is_aligned_block(address_t p, size_t size) + SNMALLOC_FAST_PATH_INLINE bool is_aligned_block(address_t p, size_t size) { static_assert(bits::is_pow2(alignment)); @@ -94,7 +97,7 @@ namespace snmalloc } template - static inline bool is_aligned_block(void* p, size_t size) + SNMALLOC_FAST_PATH_INLINE bool is_aligned_block(void* p, size_t size) { return is_aligned_block(address_cast(p), size); } @@ -104,7 +107,7 @@ namespace snmalloc * a power of two. */ template - inline uintptr_t pointer_align_down(uintptr_t p) + SNMALLOC_FAST_PATH_INLINE uintptr_t pointer_align_down(uintptr_t p) { static_assert(alignment > 0); static_assert(bits::is_pow2(alignment)); @@ -125,7 +128,7 @@ namespace snmalloc * power of two. */ template - inline T* pointer_align_down(void* p) + SNMALLOC_FAST_PATH_INLINE T* pointer_align_down(void* p) { return unsafe_from_uintptr( pointer_align_down(unsafe_to_uintptr(p))); @@ -134,14 +137,16 @@ namespace snmalloc template< size_t alignment, typename T, - SNMALLOC_CONCEPT(capptr::ConceptBound) bounds> - inline CapPtr pointer_align_down(CapPtr p) + SNMALLOC_CONCEPT(capptr::IsBound) bounds> + SNMALLOC_FAST_PATH_INLINE CapPtr + pointer_align_down(CapPtr p) { - return CapPtr(pointer_align_down(p.unsafe_ptr())); + return CapPtr::unsafe_from( + pointer_align_down(p.unsafe_ptr())); } template - inline address_t address_align_down(address_t p) + SNMALLOC_FAST_PATH_INLINE address_t address_align_down(address_t p) { return bits::align_down(p, alignment); } @@ -151,7 +156,7 @@ namespace snmalloc * power of two. */ template - inline T* pointer_align_up(void* p) + SNMALLOC_FAST_PATH_INLINE T* pointer_align_up(void* p) { static_assert(alignment > 0); static_assert(bits::is_pow2(alignment)); @@ -171,14 +176,16 @@ namespace snmalloc template< size_t alignment, typename T = void, - SNMALLOC_CONCEPT(capptr::ConceptBound) bounds> - inline CapPtr pointer_align_up(CapPtr p) + SNMALLOC_CONCEPT(capptr::IsBound) bounds> + SNMALLOC_FAST_PATH_INLINE CapPtr + pointer_align_up(CapPtr p) { - return CapPtr(pointer_align_up(p.unsafe_ptr())); + return CapPtr::unsafe_from( + pointer_align_up(p.unsafe_ptr())); } template - inline address_t address_align_up(address_t p) + SNMALLOC_FAST_PATH_INLINE address_t address_align_up(address_t p) { return bits::align_up(p, alignment); } @@ -188,7 +195,7 @@ namespace snmalloc * a power of two. */ template - inline T* pointer_align_down(void* p, size_t alignment) + SNMALLOC_FAST_PATH_INLINE T* pointer_align_down(void* p, size_t alignment) { SNMALLOC_ASSERT(alignment > 0); SNMALLOC_ASSERT(bits::is_pow2(alignment)); @@ -200,11 +207,12 @@ namespace snmalloc #endif } - template - inline CapPtr + template + SNMALLOC_FAST_PATH_INLINE CapPtr pointer_align_down(CapPtr p, size_t alignment) { - return CapPtr(pointer_align_down(p.unsafe_ptr(), alignment)); + return CapPtr::unsafe_from( + pointer_align_down(p.unsafe_ptr(), alignment)); } /** @@ -212,7 +220,7 @@ namespace snmalloc * be a power of two. */ template - inline T* pointer_align_up(void* p, size_t alignment) + SNMALLOC_FAST_PATH_INLINE T* pointer_align_up(void* p, size_t alignment) { SNMALLOC_ASSERT(alignment > 0); SNMALLOC_ASSERT(bits::is_pow2(alignment)); @@ -224,11 +232,12 @@ namespace snmalloc #endif } - template - inline CapPtr + template + SNMALLOC_FAST_PATH_INLINE CapPtr pointer_align_up(CapPtr p, size_t alignment) { - return CapPtr(pointer_align_up(p.unsafe_ptr(), alignment)); + return CapPtr::unsafe_from( + pointer_align_up(p.unsafe_ptr(), alignment)); } /** @@ -236,7 +245,8 @@ namespace snmalloc * expected to point to the base of some (sub)allocation into which cursor * points; would-be negative answers trip an assertion in debug builds. */ - inline size_t pointer_diff(const void* base, const void* cursor) + SNMALLOC_FAST_PATH_INLINE size_t + pointer_diff(const void* base, const void* cursor) { SNMALLOC_ASSERT(cursor >= base); return static_cast( @@ -246,9 +256,10 @@ namespace snmalloc template< typename T = void, typename U = void, - SNMALLOC_CONCEPT(capptr::ConceptBound) Tbounds, - SNMALLOC_CONCEPT(capptr::ConceptBound) Ubounds> - inline size_t pointer_diff(CapPtr base, CapPtr cursor) + SNMALLOC_CONCEPT(capptr::IsBound) Tbounds, + SNMALLOC_CONCEPT(capptr::IsBound) Ubounds> + SNMALLOC_FAST_PATH_INLINE size_t + pointer_diff(CapPtr base, CapPtr cursor) { return pointer_diff(base.unsafe_ptr(), cursor.unsafe_ptr()); } @@ -257,7 +268,8 @@ namespace snmalloc * Compute the difference in pointers in units of char. This can be used * across allocations. */ - inline ptrdiff_t pointer_diff_signed(void* base, void* cursor) + SNMALLOC_FAST_PATH_INLINE ptrdiff_t + pointer_diff_signed(void* base, void* cursor) { return static_cast( static_cast(cursor) - static_cast(base)); @@ -266,12 +278,32 @@ namespace snmalloc template< typename T = void, typename U = void, - SNMALLOC_CONCEPT(capptr::ConceptBound) Tbounds, - SNMALLOC_CONCEPT(capptr::ConceptBound) Ubounds> - inline ptrdiff_t + SNMALLOC_CONCEPT(capptr::IsBound) Tbounds, + SNMALLOC_CONCEPT(capptr::IsBound) Ubounds> + SNMALLOC_FAST_PATH_INLINE ptrdiff_t pointer_diff_signed(CapPtr base, CapPtr cursor) { return pointer_diff_signed(base.unsafe_ptr(), cursor.unsafe_ptr()); } + /** + * Compute the degree to which an address is misaligned relative to some + * putative alignment. + */ + template + SNMALLOC_FAST_PATH_INLINE size_t address_misalignment(address_t a) + { + return static_cast(a - pointer_align_down(a)); + } + + /** + * Convert an address_t to a pointer. The returned pointer should never be + * followed. On CHERI following this pointer will result in a capability + * violation. + */ + template + SNMALLOC_FAST_PATH_INLINE T* useless_ptr_from_addr(address_t p) + { + return reinterpret_cast(static_cast(p)); + } } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend/backend.h b/3rdparty/exported/snmalloc/src/snmalloc/backend/backend.h index dda05abd89e7..d220a080a558 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend/backend.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend/backend.h @@ -1,216 +1,32 @@ #pragma once #include "../backend_helpers/backend_helpers.h" -#if defined(SNMALLOC_CHECK_CLIENT) && !defined(OPEN_ENCLAVE) -/** - * Protect meta data blocks by allocating separate from chunks for - * user allocations. This involves leaving gaps in address space. - * This is less efficient, so should only be applied for the checked - * build. - * - * On Open Enclave the address space is limited, so we disable this - * feature. - */ -# define SNMALLOC_META_PROTECTED -#endif - namespace snmalloc { /** * This class implements the standard backend for handling allocations. - * It abstracts page table management and address space management. + * It is parameterised by its Pagemap management and + * address space management (LocalState). */ - template - class BackendAllocator : public CommonConfig + template< + SNMALLOC_CONCEPT(IsPAL) PAL, + typename PagemapEntry, + typename Pagemap, + typename Authmap, + typename LocalState> + class BackendAllocator { - public: - class PageMapEntry; - using Pal = PAL; - using SlabMetadata = FrontendSlabMetadata; - - private: - using ConcretePagemap = - FlatPagemap; + using GlobalMetaRange = typename LocalState::GlobalMetaRange; + using Stats = typename LocalState::Stats; public: - /** - * Example of type stored in the pagemap. - * The following class could be replaced by: - * - * ``` - * using PageMapEntry = FrontendMetaEntry; - * ``` - * - * The full form here provides an example of how to extend the pagemap - * entries. It also guarantees that the front end never directly - * constructs meta entries, it only ever reads them or modifies them in - * place. - */ - class PageMapEntry : public FrontendMetaEntry - { - /** - * The private initialising constructor is usable only by this back end. - */ - friend class BackendAllocator; - - /** - * The private default constructor is usable only by the pagemap. - */ - friend ConcretePagemap; - - /** - * The only constructor that creates newly initialised meta entries. - * This is callable only by the back end. The front end may copy, - * query, and update these entries, but it may not create them - * directly. This contract allows the back end to store any arbitrary - * metadata in meta entries when they are first constructed. - */ - SNMALLOC_FAST_PATH - PageMapEntry(SlabMetadata* meta, uintptr_t ras) - : FrontendMetaEntry(meta, ras) - {} - - /** - * Copy assignment is used only by the pagemap. - */ - PageMapEntry& operator=(const PageMapEntry& other) - { - FrontendMetaEntry::operator=(other); - return *this; - } - - /** - * Default constructor. This must be callable from the pagemap. - */ - SNMALLOC_FAST_PATH PageMapEntry() = default; - }; - using Pagemap = BasicPagemap< - BackendAllocator, - PAL, - ConcretePagemap, - PageMapEntry, - fixed_range>; - -#if defined(_WIN32) || defined(__CHERI_PURE_CAPABILITY__) - static constexpr bool CONSOLIDATE_PAL_ALLOCS = false; -#else - static constexpr bool CONSOLIDATE_PAL_ALLOCS = true; -#endif - - // Set up source of memory - using P = PalRange; - using Base = std::conditional_t< - fixed_range, - EmptyRange, - PagemapRegisterRange>; - - static constexpr size_t MinBaseSizeBits() - { - if constexpr (pal_supports) - { - return bits::next_pow2_bits_const(PAL::minimum_alloc_size); - } - else - { - return MIN_CHUNK_BITS; - } - } - - // Global range of memory - using GlobalR = GlobalRange>>; - -#ifdef SNMALLOC_META_PROTECTED - // Introduce two global ranges, so we don't mix Object and Meta - using CentralObjectRange = GlobalRange>>; - using CentralMetaRange = GlobalRange, // Use SubRange to introduce guard pages. - 24, - bits::BITS - 1, - Pagemap, - MinBaseSizeBits()>>>; - - // Source for object allocations - using StatsObject = StatsRange>; - using ObjectRange = - LogRange<5, LargeBuddyRange>; - - using StatsMeta = StatsRange>; - - using MetaRange = SmallBuddyRange< - LargeBuddyRange>; - // Create global range that can service small meta-data requests. - // Don't want to add this to the CentralMetaRange to move Commit outside the - // lock on the common case. - using GlobalMetaRange = GlobalRange>; - using Stats = StatsCombiner; -#else - // Source for object allocations and metadata - // No separation between the two - using Stats = StatsRange; - using ObjectRange = SmallBuddyRange< - LargeBuddyRange, 21, 21, Pagemap>>; - using GlobalMetaRange = GlobalRange; -#endif - - struct LocalState - { - ObjectRange object_range; - -#ifdef SNMALLOC_META_PROTECTED - MetaRange meta_range; + using Pal = PAL; + using SlabMetadata = typename PagemapEntry::SlabMetadata; - MetaRange& get_meta_range() - { - return meta_range; - } -#else - ObjectRange& get_meta_range() - { - return object_range; - } -#endif - }; + static constexpr size_t SizeofMetadata = + bits::next_pow2_const(sizeof(SlabMetadata)); public: - template - static std::enable_if_t init() - { - static_assert(fixed_range_ == fixed_range, "Don't set SFINAE parameter!"); - - Pagemap::concretePagemap.init(); - } - - template - static std::enable_if_t init(void* base, size_t length) - { - static_assert(fixed_range_ == fixed_range, "Don't set SFINAE parameter!"); - - auto [heap_base, heap_length] = - Pagemap::concretePagemap.init(base, length); - - Pagemap::register_range(address_cast(heap_base), heap_length); - - // Push memory into the global range. - range_to_pow_2_blocks( - capptr::Chunk(heap_base), - heap_length, - [&](capptr::Chunk p, size_t sz, bool) { - GlobalR g; - g.dealloc_range(p, sz); - }); - } - /** * Provide a block of meta-data with size and align. * @@ -223,10 +39,14 @@ namespace snmalloc * does not avail itself of this degree of freedom. */ template - static capptr::Chunk + static capptr::Alloc alloc_meta_data(LocalState* local_state, size_t size) { - capptr::Chunk p; + capptr::Arena p; + + // Meta-data does not use our sizeclass machinery, so have Aal round up + size = Aal::capptr_size_round(size); + if (local_state != nullptr) { p = local_state->get_meta_range().alloc_range_with_leftover(size); @@ -241,9 +61,13 @@ namespace snmalloc } if (p == nullptr) + { errno = ENOMEM; + return nullptr; + } - return p; + return capptr_to_user_address_control( + Aal::capptr_bound(p, size)); } /** @@ -261,8 +85,7 @@ namespace snmalloc SNMALLOC_ASSERT(bits::is_pow2(size)); SNMALLOC_ASSERT(size >= MIN_CHUNK_SIZE); - auto meta_cap = - local_state.get_meta_range().alloc_range(sizeof(SlabMetadata)); + auto meta_cap = local_state.get_meta_range().alloc_range(SizeofMetadata); auto meta = meta_cap.template as_reinterpret().unsafe_ptr(); @@ -272,29 +95,36 @@ namespace snmalloc return {nullptr, nullptr}; } - auto p = local_state.object_range.alloc_range(size); + capptr::Arena p = local_state.get_object_range()->alloc_range(size); #ifdef SNMALLOC_TRACING message<1024>("Alloc chunk: {} ({})", p.unsafe_ptr(), size); #endif if (p == nullptr) { - local_state.get_meta_range().dealloc_range( - meta_cap, sizeof(SlabMetadata)); + local_state.get_meta_range().dealloc_range(meta_cap, SizeofMetadata); errno = ENOMEM; #ifdef SNMALLOC_TRACING message<1024>("Out of memory"); #endif - return {p, nullptr}; + return {nullptr, nullptr}; } typename Pagemap::Entry t(meta, ras); Pagemap::set_metaentry(address_cast(p), size, t); - p = Aal::capptr_bound(p, size); - return {p, meta}; + return {Aal::capptr_bound(p, size), meta}; } + /** + * Deallocate a chunk of memory of size `size` and base `alloc`. + * The `slab_metadata` is the meta-data block associated with this + * chunk. The backend can recalculate this, but as the callee will + * already have it, we take it for possibly more optimal code. + * + * LocalState contains all the information about the various ranges + * that are used by the backend to manage the address space. + */ static void dealloc_chunk( LocalState& local_state, SlabMetadata& slab_metadata, @@ -319,14 +149,23 @@ namespace snmalloc Pagemap::get_metaentry(address_cast(alloc)).get_slab_metadata()); Pagemap::set_metaentry(address_cast(alloc), size, t); + /* + * On CHERI, the passed alloc has had its bounds narrowed to just the + * Chunk, and so we retrieve the Arena-bounded cap for use in the + * remainder of the backend. + */ + capptr::Arena arena = Authmap::amplify(alloc); + local_state.get_meta_range().dealloc_range( - capptr::Chunk(&slab_metadata), sizeof(SlabMetadata)); + capptr::Arena::unsafe_from(&slab_metadata), SizeofMetadata); + + local_state.get_object_range()->dealloc_range(arena, size); + } - // On non-CHERI platforms, we don't need to re-derive to get a pointer to - // the chunk. On CHERI platforms this will need to be stored in the - // SlabMetadata or similar. - capptr::Chunk chunk{alloc.unsafe_ptr()}; - local_state.object_range.dealloc_range(chunk, size); + template + SNMALLOC_FAST_PATH static const PagemapEntry& get_metaentry(address_t p) + { + return Pagemap::template get_metaentry(p); } static size_t get_current_usage() diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend/base_constants.h b/3rdparty/exported/snmalloc/src/snmalloc/backend/base_constants.h new file mode 100644 index 000000000000..92385a9601fd --- /dev/null +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend/base_constants.h @@ -0,0 +1,21 @@ + + +#pragma once + +#include "../backend/backend.h" + +namespace snmalloc +{ + /** + * Base range configuration contains common parts of other ranges. + */ + struct BaseLocalStateConstants + { + protected: + // Size of requests that the global cache should use + static constexpr size_t GlobalCacheSizeBits = 24; + + // Size of requests that the local cache should use + static constexpr size_t LocalCacheSizeBits = 21; + }; +} // namespace snmalloc \ No newline at end of file diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend/fixedglobalconfig.h b/3rdparty/exported/snmalloc/src/snmalloc/backend/fixedglobalconfig.h index 41b5026884af..c6784e703779 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend/fixedglobalconfig.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend/fixedglobalconfig.h @@ -1,21 +1,47 @@ #pragma once -#include "../backend/backend.h" +#include "../backend_helpers/backend_helpers.h" +#include "standard_range.h" namespace snmalloc { /** * A single fixed address range allocator configuration */ - template - class FixedGlobals final : public BackendAllocator + template + class FixedRangeConfig final : public CommonConfig { public: - using GlobalPoolState = PoolState>; + using PagemapEntry = DefaultPagemapEntry; private: - using Backend = BackendAllocator; + using ConcretePagemap = + FlatPagemap; + using Pagemap = BasicPagemap; + + struct Authmap + { + static inline capptr::Arena arena; + + template + static SNMALLOC_FAST_PATH capptr::Arena + amplify(capptr::Alloc c) + { + return Aal::capptr_rebound(arena, c); + } + }; + + public: + using LocalState = StandardLocalState; + + using GlobalPoolState = PoolState>; + + using Backend = + BackendAllocator; + using Pal = PAL; + + private: inline static GlobalPoolState alloc_pool; public: @@ -54,19 +80,39 @@ namespace snmalloc snmalloc::register_clean_up(); } - static void - init(typename Backend::LocalState* local_state, void* base, size_t length) + static void init(LocalState* local_state, void* base, size_t length) { UNUSED(local_state); - Backend::init(base, length); + + auto [heap_base, heap_length] = + Pagemap::concretePagemap.init(base, length); + + // Make this a alloc_config constant. + if (length < MIN_HEAP_SIZE_FOR_THREAD_LOCAL_BUDDY) + { + LocalState::set_small_heap(); + } + + Authmap::arena = capptr::Arena::unsafe_from(heap_base); + + Pagemap::register_range(Authmap::arena, heap_length); + + // Push memory into the global range. + range_to_pow_2_blocks( + capptr::Arena::unsafe_from(heap_base), + heap_length, + [&](capptr::Arena p, size_t sz, bool) { + typename LocalState::GlobalR g; + g.dealloc_range(p, sz); + }); } /* Verify that a pointer points into the region managed by this config */ - template + template static SNMALLOC_FAST_PATH CapPtr< T, typename B::template with_wildness> - capptr_domesticate(typename Backend::LocalState* ls, CapPtr p) + capptr_domesticate(LocalState* ls, CapPtr p) { static_assert(B::wildness == capptr::dimension::Wildness::Wild); @@ -75,7 +121,7 @@ namespace snmalloc UNUSED(ls); auto address = address_cast(p); - auto [base, length] = Backend::Pagemap::get_bounds(); + auto [base, length] = Pagemap::get_bounds(); if ((address - base > (length - sz)) || (length < sz)) { return nullptr; @@ -83,8 +129,8 @@ namespace snmalloc return CapPtr< T, - typename B::template with_wildness>( - p.unsafe_ptr()); + typename B::template with_wildness>:: + unsafe_from(p.unsafe_ptr()); } }; } diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend/globalconfig.h b/3rdparty/exported/snmalloc/src/snmalloc/backend/globalconfig.h index 6e88f175161e..525c77275c89 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend/globalconfig.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend/globalconfig.h @@ -4,57 +4,98 @@ // `snmalloc.h` or consume the global allocation APIs. #ifndef SNMALLOC_PROVIDE_OWN_CONFIG -# include "../backend/backend.h" +# include "../backend_helpers/backend_helpers.h" +# include "backend.h" +# include "meta_protected_range.h" +# include "standard_range.h" namespace snmalloc { // Forward reference to thread local cleanup. void register_clean_up(); -# ifdef USE_SNMALLOC_STATS - inline static void print_stats() - { - printf("No Stats yet!"); - // Stats s; - // current_alloc_pool()->aggregate_stats(s); - // s.print(std::cout); - } -# endif - /** - * The default configuration for a global snmalloc. This allocates memory - * from the operating system and expects to manage memory anywhere in the - * address space. + * The default configuration for a global snmalloc. It contains all the + * datastructures to manage the memory from the OS. It had several internal + * public types for various aspects of the code. + * The most notable are: + * + * Backend - Manages the memory coming from the platform. + * LocalState - the per-thread/per-allocator state that may perform local + * caching of reserved memory. This also specifies the various Range types + * used to manage the memory. + * + * The Configuration sets up a Pagemap for the backend to use, and the state + * required to build new allocators (GlobalPoolState). */ - class Globals final : public BackendAllocator + class StandardConfig final : public CommonConfig { + using GlobalPoolState = PoolState>; + public: - using GlobalPoolState = PoolState>; + using Pal = DefaultPal; + using PagemapEntry = DefaultPagemapEntry; private: - using Backend = BackendAllocator; + using ConcretePagemap = + FlatPagemap; + + using Pagemap = BasicPagemap; + + using ConcreteAuthmap = + FlatPagemap(), capptr::Arena, Pal, false>; + using Authmap = DefaultAuthmap; + + /** + * This specifies where this configurations sources memory from and the + * pagemap (and authmap) that maintain metadata about underlying OS + * allocations. + * @{ + */ + + using Base = Pipe< + PalRange, + PagemapRegisterRange, + PagemapRegisterRange>; + + /** + * @} + */ + public: + /** + * Use one of the default range configurations + */ + using LocalState = std::conditional_t< + mitigations(metadata_protection), + MetaProtectedRangeLocalState, + StandardLocalState>; + + /** + * Use the default backend. + */ + using Backend = + BackendAllocator; + + private: SNMALLOC_REQUIRE_CONSTINIT inline static GlobalPoolState alloc_pool; + /** + * Specifies if the Configuration has been initialised. + */ SNMALLOC_REQUIRE_CONSTINIT inline static std::atomic initialised{false}; + /** + * Used to prevent two threads attempting to initialise the configuration + */ SNMALLOC_REQUIRE_CONSTINIT inline static FlagWord initialisation_lock{}; - public: - static GlobalPoolState& pool() - { - return alloc_pool; - } - - static constexpr Flags Options{}; - // Performs initialisation for this configuration - // of allocators. Needs to be idempotent, - // and concurrency safe. - static void ensure_init() + // of allocators. + SNMALLOC_SLOW_PATH static void ensure_init_slow() { FlagLock lock{initialisation_lock}; # ifdef SNMALLOC_TRACING @@ -67,16 +108,44 @@ namespace snmalloc LocalEntropy entropy; entropy.init(); // Initialise key for remote deallocation lists - key_global = FreeListKey(entropy.get_free_list_key()); + RemoteAllocator::key_global = FreeListKey(entropy.get_free_list_key()); - // Need to initialise pagemap. - Backend::init(); + // Need to randomise pagemap location. If requested and not a + // StrictProvenance architecture, randomize its table's location within a + // significantly larger address space allocation. + static constexpr bool pagemap_randomize = + mitigations(random_pagemap) && !aal_supports; -# ifdef USE_SNMALLOC_STATS - atexit(snmalloc::print_stats); -# endif + Pagemap::concretePagemap.template init(); - initialised = true; + if constexpr (aal_supports) + { + Authmap::init(); + } + + initialised.store(true, std::memory_order_release); + } + + public: + /** + * Provides the state to create new allocators. + */ + static GlobalPoolState& pool() + { + return alloc_pool; + } + + static constexpr Flags Options{}; + + // Performs initialisation for this configuration + // of allocators. Needs to be idempotent, + // and concurrency safe. + SNMALLOC_FAST_PATH static void ensure_init() + { + if (SNMALLOC_LIKELY(initialised.load(std::memory_order_acquire))) + return; + + ensure_init_slow(); } static bool is_initialised() @@ -93,11 +162,10 @@ namespace snmalloc snmalloc::register_clean_up(); } }; -} // namespace snmalloc -// The default configuration for snmalloc -namespace snmalloc -{ - using Alloc = snmalloc::LocalAllocator; + /** + * Create allocator type for this configuration. + */ + using Alloc = snmalloc::LocalAllocator; } // namespace snmalloc #endif diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend/meta_protected_range.h b/3rdparty/exported/snmalloc/src/snmalloc/backend/meta_protected_range.h new file mode 100644 index 000000000000..5c5795cc0589 --- /dev/null +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend/meta_protected_range.h @@ -0,0 +1,129 @@ +#pragma once + +#include "../backend/backend.h" +#include "base_constants.h" + +namespace snmalloc +{ + /** + * Range that carefully ensures meta-data and object data cannot be in + * the same memory range. Once memory has is used for either meta-data + * or object data it can never be recycled to the other. + * + * This configuration also includes guard pages and randomisation. + * + * PAL is the underlying PAL that is used to Commit memory ranges. + * + * Base is where memory is sourced from. + * + * MinSizeBits is the minimum request size that can be passed to Base. + * On Windows this 16 as VirtualAlloc cannot reserve less than 64KiB. + * Alternative configurations might make this 2MiB so that huge pages + * can be used. + */ + template< + typename PAL, + typename Pagemap, + typename Base, + size_t MinSizeBits = MinBaseSizeBits()> + struct MetaProtectedRangeLocalState : BaseLocalStateConstants + { + private: + // Global range of memory + using GlobalR = Pipe< + Base, + LargeBuddyRange< + GlobalCacheSizeBits, + bits::BITS - 1, + Pagemap, + MinSizeBits>, + LogRange<2>, + GlobalRange>; + + static constexpr size_t page_size_bits = + bits::next_pow2_bits_const(PAL::page_size); + + static constexpr size_t max_page_chunk_size_bits = + bits::max(page_size_bits, MIN_CHUNK_BITS); + + // Central source of object-range, does not pass back to GlobalR as + // that would allow flows from Objects to Meta-data, and thus UAF + // would be able to corrupt meta-data. + using CentralObjectRange = Pipe< + GlobalR, + LargeBuddyRange, + LogRange<3>, + GlobalRange, + CommitRange, + StatsRange>; + + // Controls the padding around the meta-data range. + // The larger the padding range the more randomisation that + // can be used. + static constexpr size_t SubRangeRatioBits = 6; + + // Centralised source of meta-range + using CentralMetaRange = Pipe< + GlobalR, + SubRange, // Use SubRange to introduce guard + // pages. + LargeBuddyRange< + GlobalCacheSizeBits, + bits::BITS - 1, + Pagemap, + page_size_bits>, + CommitRange, + // In case of huge pages, we don't want to give each thread its own huge + // page, so commit in the global range. + LargeBuddyRange< + max_page_chunk_size_bits, + max_page_chunk_size_bits, + Pagemap, + page_size_bits>, + LogRange<4>, + GlobalRange, + StatsRange>; + + // Local caching of object range + using ObjectRange = Pipe< + CentralObjectRange, + LargeBuddyRange< + LocalCacheSizeBits, + LocalCacheSizeBits, + Pagemap, + page_size_bits>, + LogRange<5>>; + + // Local caching of meta-data range + using MetaRange = Pipe< + CentralMetaRange, + LargeBuddyRange< + LocalCacheSizeBits - SubRangeRatioBits, + bits::BITS - 1, + Pagemap>, + SmallBuddyRange>; + + ObjectRange object_range; + + MetaRange meta_range; + + public: + using Stats = StatsCombiner; + + ObjectRange* get_object_range() + { + return &object_range; + } + + MetaRange& get_meta_range() + { + return meta_range; + } + + // Create global range that can service small meta-data requests. + // Don't want to add the SmallBuddyRange to the CentralMetaRange as that + // would require committing memory inside the main global lock. + using GlobalMetaRange = + Pipe; + }; +} // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend/standard_range.h b/3rdparty/exported/snmalloc/src/snmalloc/backend/standard_range.h new file mode 100644 index 000000000000..78609ed2d0b3 --- /dev/null +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend/standard_range.h @@ -0,0 +1,95 @@ + + +#pragma once + +#include "../backend/backend.h" +#include "base_constants.h" + +namespace snmalloc +{ + /** + * Default configuration that does not provide any meta-data protection. + * + * PAL is the underlying PAL that is used to Commit memory ranges. + * + * Base is where memory is sourced from. + * + * MinSizeBits is the minimum request size that can be passed to Base. + * On Windows this 16 as VirtualAlloc cannot reserve less than 64KiB. + * Alternative configurations might make this 2MiB so that huge pages + * can be used. + */ + template< + typename PAL, + typename Pagemap, + typename Base = EmptyRange<>, + size_t MinSizeBits = MinBaseSizeBits()> + struct StandardLocalState : BaseLocalStateConstants + { + // Global range of memory, expose this so can be filled by init. + using GlobalR = Pipe< + Base, + LargeBuddyRange< + GlobalCacheSizeBits, + bits::BITS - 1, + Pagemap, + MinSizeBits>, + LogRange<2>, + GlobalRange>; + + // Track stats of the committed memory + using Stats = Pipe, StatsRange>; + + private: + static constexpr size_t page_size_bits = + bits::next_pow2_bits_const(PAL::page_size); + + public: + // Source for object allocations and metadata + // Use buddy allocators to cache locally. + using LargeObjectRange = Pipe< + Stats, + StaticConditionalRange>>; + + private: + using ObjectRange = Pipe; + + ObjectRange object_range; + + public: + // Expose a global range for the initial allocation of meta-data. + using GlobalMetaRange = Pipe; + + /** + * Where we turn for allocations of user chunks. + * + * Reach over the SmallBuddyRange that's at the near end of the ObjectRange + * pipe, rather than having that range adapter dynamically branch to its + * parent. + */ + LargeObjectRange* get_object_range() + { + return object_range.template ancestor(); + } + + /** + * The backend has its own need for small objects without using the + * frontend allocators; this range manages those. + */ + ObjectRange& get_meta_range() + { + // Use the object range to service meta-data requests. + return object_range; + } + + static void set_small_heap() + { + // This disables the thread local caching of large objects. + LargeObjectRange::disable_range(); + } + }; +} // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/authmap.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/authmap.h new file mode 100644 index 000000000000..9ad4f06dcc63 --- /dev/null +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/authmap.h @@ -0,0 +1,78 @@ +#pragma once + +#include "commonconfig.h" +#include "pagemapregisterrange.h" + +namespace snmalloc +{ + /** + * A dummy authmap that simply passes pointers through. For use on + * non-StrictProvenance architectures. + */ + struct DummyAuthmap + { + static SNMALLOC_FAST_PATH void init() {} + + static SNMALLOC_FAST_PATH void register_range(capptr::Arena, size_t) + {} + + template + static SNMALLOC_FAST_PATH capptr::Arena amplify(capptr::Alloc c) + { + return capptr::Arena::unsafe_from(c.unsafe_ptr()); + } + }; + + /** + * Wrap a concrete Pagemap to store Arena pointers, and use these when + * amplifying a pointer. + */ + template + struct BasicAuthmap + { + static_assert( + std::is_same_v, typename ConcreteMap::EntryType>, + "BasicAuthmap's ConcreteMap must have capptr::Arena element type!"); + + private: + SNMALLOC_REQUIRE_CONSTINIT + static inline ConcreteMap concreteAuthmap; + + public: + static SNMALLOC_FAST_PATH void init() + { + concreteAuthmap.template init(); + } + + static SNMALLOC_FAST_PATH void + register_range(capptr::Arena base, size_t size) + { + concreteAuthmap.register_range(address_cast(base), size); + + address_t base_addr = address_cast(base); + for (address_t a = base_addr; a < base_addr + size; + a += ConcreteMap::GRANULARITY) + { + concreteAuthmap.set(a, base); + } + } + + template + static SNMALLOC_FAST_PATH capptr::Arena amplify(capptr::Alloc c) + { + return Aal::capptr_rebound( + concreteAuthmap.template get(address_cast(c)), + c); + } + }; + + /** + * Pick between the two above implementations based on StrictProvenance + */ + template + using DefaultAuthmap = std::conditional_t< + aal_supports, + BasicAuthmap, + DummyAuthmap>; + +} // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/backend_helpers.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/backend_helpers.h index 94dfec2f2676..2104e681d53e 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/backend_helpers.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/backend_helpers.h @@ -1,9 +1,12 @@ #include "../mem/mem.h" +#include "authmap.h" #include "buddy.h" #include "commitrange.h" #include "commonconfig.h" +#include "defaultpagemapentry.h" #include "empty_range.h" #include "globalrange.h" +#include "indirectrange.h" #include "largebuddyrange.h" #include "logrange.h" #include "pagemap.h" @@ -11,5 +14,6 @@ #include "palrange.h" #include "range_helpers.h" #include "smallbuddyrange.h" +#include "staticconditionalrange.h" #include "statsrange.h" #include "subrange.h" diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/buddy.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/buddy.h index 1c9b8e4af8d7..ff9416614dda 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/buddy.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/buddy.h @@ -16,12 +16,18 @@ namespace snmalloc class Buddy { std::array, MAX_SIZE_BITS - MIN_SIZE_BITS> trees; + // All RBtrees at or above this index should be empty. + size_t empty_at_or_above = 0; size_t to_index(size_t size) { + SNMALLOC_ASSERT(size != 0); + SNMALLOC_ASSERT(bits::is_pow2(size)); auto log = snmalloc::bits::next_pow2_bits(size); - SNMALLOC_ASSERT(log >= MIN_SIZE_BITS); - SNMALLOC_ASSERT(log < MAX_SIZE_BITS); + SNMALLOC_ASSERT_MSG( + log >= MIN_SIZE_BITS, "Size too big: {} log {}.", size, log); + SNMALLOC_ASSERT_MSG( + log < MAX_SIZE_BITS, "Size too small: {} log {}.", size, log); return log - MIN_SIZE_BITS; } @@ -33,6 +39,16 @@ namespace snmalloc UNUSED(addr, size); } + void invariant() + { +#ifndef NDEBUG + for (size_t i = empty_at_or_above; i < trees.size(); i++) + { + SNMALLOC_ASSERT(trees[i].is_empty()); + } +#endif + } + public: constexpr Buddy() = default; /** @@ -48,6 +64,7 @@ namespace snmalloc typename Rep::Contents add_block(typename Rep::Contents addr, size_t size) { auto idx = to_index(size); + empty_at_or_above = bits::max(empty_at_or_above, idx + 1); validate_block(addr, size); @@ -70,8 +87,13 @@ namespace snmalloc size *= 2; addr = Rep::align_down(addr, size); if (size == bits::one_at_bit(MAX_SIZE_BITS)) + { + // Invariant should be checked on all non-tail return paths. + // Holds trivially here with current design. + invariant(); // Too big for this buddy allocator. return addr; + } return add_block(addr, size); } @@ -83,6 +105,7 @@ namespace snmalloc trees[idx].find(path, addr); } trees[idx].insert_path(path, addr); + invariant(); return Rep::null; } @@ -93,7 +116,10 @@ namespace snmalloc */ typename Rep::Contents remove_block(size_t size) { + invariant(); auto idx = to_index(size); + if (idx >= empty_at_or_above) + return Rep::null; auto addr = trees[idx].remove_min(); if (addr != Rep::null) @@ -108,7 +134,11 @@ namespace snmalloc auto bigger = remove_block(size * 2); if (bigger == Rep::null) + { + empty_at_or_above = idx; + invariant(); return Rep::null; + } auto second = Rep::offset(bigger, size); diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/commitrange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/commitrange.h index d2e6d54ff9e8..2bbd7a583062 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/commitrange.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/commitrange.h @@ -1,33 +1,53 @@ #pragma once - #include "../pal/pal.h" +#include "empty_range.h" +#include "range_helpers.h" namespace snmalloc { - template - class CommitRange + template + struct CommitRange { - ParentRange parent{}; + template + class Type : public ContainsParent + { + using ContainsParent::parent; - public: - static constexpr bool Aligned = ParentRange::Aligned; + public: + static constexpr bool Aligned = ParentRange::Aligned; - static constexpr bool ConcurrencySafe = ParentRange::ConcurrencySafe; + static constexpr bool ConcurrencySafe = ParentRange::ConcurrencySafe; - constexpr CommitRange() = default; + using ChunkBounds = typename ParentRange::ChunkBounds; + static_assert( + ChunkBounds::address_space_control == + capptr::dimension::AddressSpaceControl::Full); - capptr::Chunk alloc_range(size_t size) - { - auto range = parent.alloc_range(size); - if (range != nullptr) - PAL::template notify_using(range.unsafe_ptr(), size); - return range; - } + constexpr Type() = default; - void dealloc_range(capptr::Chunk base, size_t size) - { - PAL::notify_not_using(base.unsafe_ptr(), size); - parent.dealloc_range(base, size); - } + CapPtr alloc_range(size_t size) + { + SNMALLOC_ASSERT_MSG( + (size % PAL::page_size) == 0, + "size ({}) must be a multiple of page size ({})", + size, + PAL::page_size); + auto range = parent.alloc_range(size); + if (range != nullptr) + PAL::template notify_using(range.unsafe_ptr(), size); + return range; + } + + void dealloc_range(CapPtr base, size_t size) + { + SNMALLOC_ASSERT_MSG( + (size % PAL::page_size) == 0, + "size ({}) must be a multiple of page size ({})", + size, + PAL::page_size); + PAL::notify_not_using(base.unsafe_ptr(), size); + parent.dealloc_range(base, size); + } + }; }; } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/commonconfig.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/commonconfig.h index aca6103f73e8..a69b6a3897d4 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/commonconfig.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/commonconfig.h @@ -15,14 +15,14 @@ namespace snmalloc * this as a field of the global object: * * ```c++ - * constexpr static snmalloc::Flags Options{}; + * static constexpr snmalloc::Flags Options{}; * ``` * * A global configuration that wished to use out-of-line message queues but * accept the defaults for everything else would instead do this: * * ```c++ - * constexpr static snmalloc::Flags Options{.IsQueueInline = false}; + * static constexpr snmalloc::Flags Options{.IsQueueInline = false}; * ``` * * To maintain backwards source compatibility in future versions, any new @@ -113,5 +113,17 @@ namespace snmalloc inline static RemoteAllocator unused_remote; }; + template + static constexpr size_t MinBaseSizeBits() + { + if constexpr (pal_supports) + { + return bits::next_pow2_bits_const(PAL::minimum_alloc_size); + } + else + { + return MIN_CHUNK_BITS; + } + } } // namespace snmalloc #include "../mem/remotecache.h" diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/defaultpagemapentry.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/defaultpagemapentry.h new file mode 100644 index 000000000000..2083db30eb08 --- /dev/null +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/defaultpagemapentry.h @@ -0,0 +1,72 @@ +#pragma once + +#include "../mem/mem.h" + +namespace snmalloc +{ + /** + * Example of type stored in the pagemap. + * The following class could be replaced by: + * + * ``` + * using DefaultPagemapEntry = FrontendMetaEntry; + * ``` + * + * The full form here provides an example of how to extend the pagemap + * entries. It also guarantees that the front end never directly + * constructs meta entries, it only ever reads them or modifies them in + * place. + */ + template + class DefaultPagemapEntryT : public FrontendMetaEntry + { + /** + * The private initialising constructor is usable only by this back end. + */ + template< + SNMALLOC_CONCEPT(IsPAL) A1, + typename A2, + typename A3, + typename A4, + typename A5> + friend class BackendAllocator; + + /** + * The private default constructor is usable only by the pagemap. + */ + template + friend class FlatPagemap; + + /** + * The only constructor that creates newly initialised meta entries. + * This is callable only by the back end. The front end may copy, + * query, and update these entries, but it may not create them + * directly. This contract allows the back end to store any arbitrary + * metadata in meta entries when they are first constructed. + */ + SNMALLOC_FAST_PATH + DefaultPagemapEntryT(SlabMetadata* meta, uintptr_t ras) + : FrontendMetaEntry(meta, ras) + {} + + /** + * Copy assignment is used only by the pagemap. + */ + DefaultPagemapEntryT& operator=(const DefaultPagemapEntryT& other) + { + FrontendMetaEntry::operator=(other); + return *this; + } + + /** + * Default constructor. This must be callable from the pagemap. + */ + SNMALLOC_FAST_PATH DefaultPagemapEntryT() = default; + }; + + class DefaultSlabMetadata : public FrontendSlabMetadata + {}; + + using DefaultPagemapEntry = DefaultPagemapEntryT; + +} // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/empty_range.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/empty_range.h index 6507a01e3f0f..db91b9dc18a5 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/empty_range.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/empty_range.h @@ -3,6 +3,7 @@ namespace snmalloc { + template class EmptyRange { public: @@ -10,9 +11,11 @@ namespace snmalloc static constexpr bool ConcurrencySafe = true; + using ChunkBounds = B; + constexpr EmptyRange() = default; - capptr::Chunk alloc_range(size_t) + CapPtr alloc_range(size_t) { return nullptr; } diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/globalrange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/globalrange.h index b21d4d08b358..b6fdbef8bcbf 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/globalrange.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/globalrange.h @@ -1,6 +1,9 @@ #pragma once #include "../ds/ds.h" +#include "empty_range.h" +#include "lockrange.h" +#include "staticrange.h" namespace snmalloc { @@ -8,34 +11,10 @@ namespace snmalloc * Makes the supplied ParentRange into a global variable, * and protects access with a lock. */ - template - class GlobalRange + struct GlobalRange { - SNMALLOC_REQUIRE_CONSTINIT static inline ParentRange parent{}; - - /** - * This is infrequently used code, a spin lock simplifies the code - * considerably, and should never be on the fast path. - */ - SNMALLOC_REQUIRE_CONSTINIT static inline FlagWord spin_lock{}; - - public: - static constexpr bool Aligned = ParentRange::Aligned; - - static constexpr bool ConcurrencySafe = true; - - constexpr GlobalRange() = default; - - capptr::Chunk alloc_range(size_t size) - { - FlagLock lock(spin_lock); - return parent.alloc_range(size); - } - - void dealloc_range(capptr::Chunk base, size_t size) - { - FlagLock lock(spin_lock); - parent.dealloc_range(base, size); - } + template> + class Type : public Pipe + {}; }; } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/indirectrange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/indirectrange.h new file mode 100644 index 000000000000..932c3b2b4c49 --- /dev/null +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/indirectrange.h @@ -0,0 +1,54 @@ +#pragma once + +#include "../ds/ds.h" +#include "empty_range.h" + +namespace snmalloc +{ + /** + * Stores a references to the parent range so that it can be shared + * without `static` scope. + * + * This could be used to allow multiple allocators on a single region of + * memory. + */ + struct IndirectRange + { + template> + class Type : public RefParent + { + using RefParent::parent; + + public: + static constexpr bool Aligned = ParentRange::Aligned; + + static_assert( + ParentRange::ConcurrencySafe, + "IndirectRange requires a concurrency safe parent."); + + static constexpr bool ConcurrencySafe = true; + + using ChunkBounds = typename ParentRange::ChunkBounds; + + constexpr Type() = default; + + CapPtr alloc_range(size_t size) + { + return parent->alloc_range(size); + } + + void dealloc_range(CapPtr base, size_t size) + { + parent->dealloc_range(base, size); + } + + /** + * Points the parent reference to the given range. + */ + void set_parent(ParentRange* p) + { + parent = p; + } + }; + }; +} // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/largebuddyrange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/largebuddyrange.h index b81e392efc75..d1446d725fc2 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/largebuddyrange.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/largebuddyrange.h @@ -3,6 +3,7 @@ #include "../ds/ds.h" #include "../mem/mem.h" #include "buddy.h" +#include "empty_range.h" #include "range_helpers.h" #include @@ -12,7 +13,7 @@ namespace snmalloc /** * Class for using the pagemap entries for the buddy allocator. */ - template + template class BuddyChunkRep { public: @@ -183,14 +184,17 @@ namespace snmalloc * for */ template< - typename ParentRange, size_t REFILL_SIZE_BITS, size_t MAX_SIZE_BITS, - SNMALLOC_CONCEPT(ConceptBuddyRangeMeta) Pagemap, + SNMALLOC_CONCEPT(IsWritablePagemap) Pagemap, size_t MIN_REFILL_SIZE_BITS = 0> class LargeBuddyRange { - ParentRange parent{}; + static_assert( + REFILL_SIZE_BITS <= MAX_SIZE_BITS, "REFILL_SIZE_BITS > MAX_SIZE_BITS"); + static_assert( + MIN_REFILL_SIZE_BITS <= REFILL_SIZE_BITS, + "MIN_REFILL_SIZE_BITS > REFILL_SIZE_BITS"); /** * Maximum size of a refill @@ -203,175 +207,189 @@ namespace snmalloc static constexpr size_t MIN_REFILL_SIZE = bits::one_at_bit(MIN_REFILL_SIZE_BITS); - /** - * The size of memory requested so far. - * - * This is used to determine the refill size. - */ - size_t requested_total = 0; - - /** - * Buddy allocator used to represent this range of memory. - */ - Buddy, MIN_CHUNK_BITS, MAX_SIZE_BITS> buddy_large; - - /** - * The parent might not support deallocation if this buddy allocator covers - * the whole range. Uses template insanity to make this work. - */ - template - std::enable_if_t - parent_dealloc_range(capptr::Chunk base, size_t size) + public: + template> + class Type : public ContainsParent { - static_assert( - MAX_SIZE_BITS != (bits::BITS - 1), "Don't set SFINAE parameter"); - parent.dealloc_range(base, size); - } + using ContainsParent::parent; + + /** + * The size of memory requested so far. + * + * This is used to determine the refill size. + */ + size_t requested_total = 0; + + /** + * Buddy allocator used to represent this range of memory. + */ + Buddy, MIN_CHUNK_BITS, MAX_SIZE_BITS> buddy_large; + + /** + * The parent might not support deallocation if this buddy allocator + * covers the whole range. Uses template insanity to make this work. + */ + template + std::enable_if_t + parent_dealloc_range(capptr::Arena base, size_t size) + { + static_assert( + MAX_SIZE_BITS != (bits::BITS - 1), "Don't set SFINAE parameter"); + parent.dealloc_range(base, size); + } - void dealloc_overflow(capptr::Chunk overflow) - { - if constexpr (MAX_SIZE_BITS != (bits::BITS - 1)) + void dealloc_overflow(capptr::Arena overflow) { - if (overflow != nullptr) + if constexpr (MAX_SIZE_BITS != (bits::BITS - 1)) { - parent.dealloc_range(overflow, bits::one_at_bit(MAX_SIZE_BITS)); + if (overflow != nullptr) + { + parent.dealloc_range(overflow, bits::one_at_bit(MAX_SIZE_BITS)); + } + } + else + { + if (overflow != nullptr) + abort(); } } - else + + /** + * Add a range of memory to the address space. + * Divides blocks into power of two sizes with natural alignment + */ + void add_range(capptr::Arena base, size_t length) { - if (overflow != nullptr) - abort(); + range_to_pow_2_blocks( + base, length, [this](capptr::Arena base, size_t align, bool) { + auto overflow = + capptr::Arena::unsafe_from(reinterpret_cast( + buddy_large.add_block(base.unsafe_uintptr(), align))); + + dealloc_overflow(overflow); + }); } - } - - /** - * Add a range of memory to the address space. - * Divides blocks into power of two sizes with natural alignment - */ - void add_range(capptr::Chunk base, size_t length) - { - range_to_pow_2_blocks( - base, length, [this](capptr::Chunk base, size_t align, bool) { - auto overflow = capptr::Chunk(reinterpret_cast( - buddy_large.add_block(base.unsafe_uintptr(), align))); - - dealloc_overflow(overflow); - }); - } - capptr::Chunk refill(size_t size) - { - if (ParentRange::Aligned) + capptr::Arena refill(size_t size) { - // Use amount currently requested to determine refill size. - // This will gradually increase the usage of the parent range. - // So small examples can grow local caches slowly, and larger - // examples will grow them by the refill size. - // - // The heuristic is designed to allocate the following sequence for - // 16KiB requests 16KiB, 16KiB, 32Kib, 64KiB, ..., REFILL_SIZE/2, - // REFILL_SIZE, REFILL_SIZE, ... Hence if this if they are coming from a - // contiguous aligned range, then they could be consolidated. This - // depends on the ParentRange behaviour. - size_t refill_size = bits::min(REFILL_SIZE, requested_total); - refill_size = bits::max(refill_size, MIN_REFILL_SIZE); - refill_size = bits::max(refill_size, size); - refill_size = bits::next_pow2(refill_size); - - auto refill_range = parent.alloc_range(refill_size); - if (refill_range != nullptr) + if (ParentRange::Aligned) { - requested_total += refill_size; - add_range(pointer_offset(refill_range, size), refill_size - size); + // Use amount currently requested to determine refill size. + // This will gradually increase the usage of the parent range. + // So small examples can grow local caches slowly, and larger + // examples will grow them by the refill size. + // + // The heuristic is designed to allocate the following sequence for + // 16KiB requests 16KiB, 16KiB, 32Kib, 64KiB, ..., REFILL_SIZE/2, + // REFILL_SIZE, REFILL_SIZE, ... Hence if this if they are coming from + // a contiguous aligned range, then they could be consolidated. This + // depends on the ParentRange behaviour. + size_t refill_size = bits::min(REFILL_SIZE, requested_total); + refill_size = bits::max(refill_size, MIN_REFILL_SIZE); + refill_size = bits::max(refill_size, size); + refill_size = bits::next_pow2(refill_size); + + auto refill_range = parent.alloc_range(refill_size); + if (refill_range != nullptr) + { + requested_total += refill_size; + add_range(pointer_offset(refill_range, size), refill_size - size); + } + return refill_range; } - return refill_range; - } - // Note the unaligned parent path does not use - // requested_total in the heuristic for the initial size - // this is because the request needs to introduce alignment. - // Currently the unaligned variant is not used as a local cache. - // So the gradual growing of refill_size is not needed. + // Note the unaligned parent path does not use + // requested_total in the heuristic for the initial size + // this is because the request needs to introduce alignment. + // Currently the unaligned variant is not used as a local cache. + // So the gradual growing of refill_size is not needed. - // Need to overallocate to get the alignment right. - bool overflow = false; - size_t needed_size = bits::umul(size, 2, overflow); - if (overflow) - { - return nullptr; - } - - auto refill_size = bits::max(needed_size, REFILL_SIZE); - while (needed_size <= refill_size) - { - auto refill = parent.alloc_range(refill_size); + // Need to overallocate to get the alignment right. + bool overflow = false; + size_t needed_size = bits::umul(size, 2, overflow); + if (overflow) + { + return nullptr; + } - if (refill != nullptr) + auto refill_size = bits::max(needed_size, REFILL_SIZE); + while (needed_size <= refill_size) { - requested_total += refill_size; - add_range(refill, refill_size); + auto refill = parent.alloc_range(refill_size); + + if (refill != nullptr) + { + requested_total += refill_size; + add_range(refill, refill_size); - SNMALLOC_ASSERT(refill_size < bits::one_at_bit(MAX_SIZE_BITS)); - static_assert( - (REFILL_SIZE < bits::one_at_bit(MAX_SIZE_BITS)) || - ParentRange::Aligned, - "Required to prevent overflow."); + SNMALLOC_ASSERT(refill_size < bits::one_at_bit(MAX_SIZE_BITS)); + static_assert( + (REFILL_SIZE < bits::one_at_bit(MAX_SIZE_BITS)) || + ParentRange::Aligned, + "Required to prevent overflow."); - return alloc_range(size); + return alloc_range(size); + } + + refill_size >>= 1; } - refill_size >>= 1; + return nullptr; } - return nullptr; - } + public: + static constexpr bool Aligned = true; - public: - static constexpr bool Aligned = true; + static constexpr bool ConcurrencySafe = false; - static constexpr bool ConcurrencySafe = false; - - constexpr LargeBuddyRange() = default; + /* The large buddy allocator always deals in Arena-bounded pointers. */ + using ChunkBounds = capptr::bounds::Arena; + static_assert( + std::is_same_v); - capptr::Chunk alloc_range(size_t size) - { - SNMALLOC_ASSERT(size >= MIN_CHUNK_SIZE); - SNMALLOC_ASSERT(bits::is_pow2(size)); + constexpr Type() = default; - if (size >= (bits::one_at_bit(MAX_SIZE_BITS) - 1)) + capptr::Arena alloc_range(size_t size) { - if (ParentRange::Aligned) - return parent.alloc_range(size); + SNMALLOC_ASSERT(size >= MIN_CHUNK_SIZE); + SNMALLOC_ASSERT(bits::is_pow2(size)); - return nullptr; - } + if (size >= (bits::one_at_bit(MAX_SIZE_BITS) - 1)) + { + if (ParentRange::Aligned) + return parent.alloc_range(size); - auto result = capptr::Chunk( - reinterpret_cast(buddy_large.remove_block(size))); + return nullptr; + } - if (result != nullptr) - return result; + auto result = capptr::Arena::unsafe_from( + reinterpret_cast(buddy_large.remove_block(size))); - return refill(size); - } + if (result != nullptr) + return result; - void dealloc_range(capptr::Chunk base, size_t size) - { - SNMALLOC_ASSERT(size >= MIN_CHUNK_SIZE); - SNMALLOC_ASSERT(bits::is_pow2(size)); + return refill(size); + } - if constexpr (MAX_SIZE_BITS != (bits::BITS - 1)) + void dealloc_range(capptr::Arena base, size_t size) { - if (size >= (bits::one_at_bit(MAX_SIZE_BITS) - 1)) + SNMALLOC_ASSERT(size >= MIN_CHUNK_SIZE); + SNMALLOC_ASSERT(bits::is_pow2(size)); + + if constexpr (MAX_SIZE_BITS != (bits::BITS - 1)) { - parent_dealloc_range(base, size); - return; + if (size >= (bits::one_at_bit(MAX_SIZE_BITS) - 1)) + { + parent_dealloc_range(base, size); + return; + } } - } - auto overflow = capptr::Chunk(reinterpret_cast( - buddy_large.add_block(base.unsafe_uintptr(), size))); - dealloc_overflow(overflow); - } + auto overflow = + capptr::Arena::unsafe_from(reinterpret_cast( + buddy_large.add_block(base.unsafe_uintptr(), size))); + dealloc_overflow(overflow); + } + }; }; } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/lockrange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/lockrange.h new file mode 100644 index 000000000000..ce91711cce4c --- /dev/null +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/lockrange.h @@ -0,0 +1,49 @@ +#pragma once + +#include "../ds/ds.h" +#include "empty_range.h" + +namespace snmalloc +{ + /** + * Protect the ParentRange with a spin lock. + * + * Accesses via the ancestor() mechanism will bypass the lock and so + * should be used only where the resulting data races are acceptable. + */ + struct LockRange + { + template> + class Type : public ContainsParent + { + using ContainsParent::parent; + + /** + * This is infrequently used code, a spin lock simplifies the code + * considerably, and should never be on the fast path. + */ + FlagWord spin_lock{}; + + public: + static constexpr bool Aligned = ParentRange::Aligned; + + using ChunkBounds = typename ParentRange::ChunkBounds; + + static constexpr bool ConcurrencySafe = true; + + constexpr Type() = default; + + CapPtr alloc_range(size_t size) + { + FlagLock lock(spin_lock); + return parent.alloc_range(size); + } + + void dealloc_range(CapPtr base, size_t size) + { + FlagLock lock(spin_lock); + parent.dealloc_range(base, size); + } + }; + }; +} // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/logrange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/logrange.h index 432e8772cc36..0a3f907de4dc 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/logrange.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/logrange.h @@ -1,5 +1,8 @@ #pragma once +#include "empty_range.h" +#include "range_helpers.h" + namespace snmalloc { /** @@ -8,45 +11,51 @@ namespace snmalloc * * ParentRange is what the range is logging calls to. */ - template - class LogRange + template + struct LogRange { - ParentRange parent{}; + template> + class Type : public ContainsParent + { + using ContainsParent::parent; - public: - static constexpr bool Aligned = ParentRange::Aligned; + public: + static constexpr bool Aligned = ParentRange::Aligned; - static constexpr bool ConcurrencySafe = ParentRange::ConcurrencySafe; + static constexpr bool ConcurrencySafe = ParentRange::ConcurrencySafe; - constexpr LogRange() = default; + using ChunkBounds = typename ParentRange::ChunkBounds; - capptr::Chunk alloc_range(size_t size) - { + constexpr Type() = default; + + CapPtr alloc_range(size_t size) + { #ifdef SNMALLOC_TRACING - message<1024>("Call alloc_range({}) on {}", size, RangeName); + message<1024>("Call alloc_range({}) on {}", size, RangeName); #endif - auto range = parent.alloc_range(size); + auto range = parent.alloc_range(size); #ifdef SNMALLOC_TRACING - message<1024>( - "{} = alloc_range({}) in {}", range.unsafe_ptr(), size, RangeName); + message<1024>( + "{} = alloc_range({}) in {}", range.unsafe_ptr(), size, RangeName); #endif - return range; - } + return range; + } - void dealloc_range(capptr::Chunk base, size_t size) - { + void dealloc_range(CapPtr base, size_t size) + { #ifdef SNMALLOC_TRACING - message<1024>( - "dealloc_range({}, {}}) on {}", base.unsafe_ptr(), size, RangeName); + message<1024>( + "dealloc_range({}, {}}) on {}", base.unsafe_ptr(), size, RangeName); #endif - parent.dealloc_range(base, size); + parent.dealloc_range(base, size); #ifdef SNMALLOC_TRACING - message<1024>( - "Done dealloc_range({}, {}})! on {}", - base.unsafe_ptr(), - size, - RangeName); + message<1024>( + "Done dealloc_range({}, {}})! on {}", + base.unsafe_ptr(), + size, + RangeName); #endif - } + } + }; }; -} // namespace snmalloc \ No newline at end of file +} // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/pagemap.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/pagemap.h index 15ff51cfc40c..2e118484f649 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/pagemap.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/pagemap.h @@ -8,336 +8,17 @@ namespace snmalloc { - /** - * Simple pagemap that for each GRANULARITY_BITS of the address range - * stores a T. - */ - template - class FlatPagemap - { - private: - static constexpr size_t SHIFT = GRANULARITY_BITS; - - /** - * Before init is called will contain a single entry - * that is the default value. This is needed so that - * various calls do not have to check for nullptr. - * free(nullptr) - * and - * malloc_usable_size(nullptr) - * do not require an allocation to have ocurred before - * they are called. - */ - inline static const T default_value{}; - - /** - * The representation of the page map. - * - * Initially a single element to ensure nullptr operations - * work. - */ - T* body{const_cast(&default_value)}; - - /** - * The representation of the pagemap, but nullptr if it has not been - * initialised. Used to combine init checking and lookup. - */ - T* body_opt{nullptr}; - - /** - * If `has_bounds` is set, then these should contain the - * bounds of the heap that is being managed by this pagemap. - */ - address_t base{0}; - size_t size{0}; - - public: - /** - * Ensure this range of pagemap is accessible - */ - void register_range(address_t p, size_t length) - { - // Calculate range in pagemap that is associated to this space. - auto first = &body[p >> SHIFT]; - auto last = &body[(p + length + bits::one_at_bit(SHIFT) - 1) >> SHIFT]; - - // Commit OS pages associated to the range. - auto page_start = pointer_align_down(first); - auto page_end = pointer_align_up(last); - size_t using_size = pointer_diff(page_start, page_end); - PAL::template notify_using(page_start, using_size); - } - - constexpr FlatPagemap() = default; - - /** - * For pagemaps that cover an entire fixed address space, return the size - * that they must be. This allows the caller to allocate the correct - * amount of memory to be passed to `init`. This is not available for - * fixed-range pagemaps, whose size depends on dynamic configuration. - */ - template - static constexpr std::enable_if_t required_size() - { - static_assert( - has_bounds_ == has_bounds, "Don't set SFINAE template parameter!"); - constexpr size_t COVERED_BITS = PAL::address_bits - GRANULARITY_BITS; - constexpr size_t ENTRIES = bits::one_at_bit(COVERED_BITS); - return ENTRIES * sizeof(T); - } - - /** - * Initialise with pre-allocated memory. - * - * This is currently disabled for bounded pagemaps but may be reenabled if - * `required_size` is enabled for the has-bounds case. - */ - template - std::enable_if_t init(T* address) - { - static_assert( - has_bounds_ == has_bounds, "Don't set SFINAE template parameter!"); - body = address; - body_opt = address; - } - - /** - * Initialise the pagemap with bounds. - * - * Returns usable range after pagemap has been allocated - */ - template - std::enable_if_t> - init(void* b, size_t s) - { - static_assert( - has_bounds_ == has_bounds, "Don't set SFINAE template parameter!"); -#ifdef SNMALLOC_TRACING - message<1024>("Pagemap.init {} ({})", b, s); -#endif - SNMALLOC_ASSERT(s != 0); - // TODO take account of pagemap size in the calculation of how big it - // needs to be. - - // Align the start and end. We won't store for the very ends as they - // are not aligned to a chunk boundary. - auto heap_base = pointer_align_up(b, bits::one_at_bit(GRANULARITY_BITS)); - auto end = pointer_align_down( - pointer_offset(b, s), bits::one_at_bit(GRANULARITY_BITS)); - size = pointer_diff(heap_base, end); - - // Put pagemap at start of range. - // TODO CHERI capability bound here! - body = static_cast(b); - body_opt = body; - // Advance by size of pagemap. - // Note that base needs to be aligned to GRANULARITY for the rest of the - // code to work - // TODO CHERI capability bound here! - heap_base = pointer_align_up( - pointer_offset(b, (size >> SHIFT) * sizeof(T)), - bits::one_at_bit(GRANULARITY_BITS)); - base = address_cast(heap_base); - SNMALLOC_ASSERT( - base == bits::align_up(base, bits::one_at_bit(GRANULARITY_BITS))); - - return {heap_base, pointer_diff(heap_base, end)}; - } - - /** - * Initialise the pagemap without bounds. - */ - template - std::enable_if_t init() - { - static_assert( - has_bounds_ == has_bounds, "Don't set SFINAE template parameter!"); - static constexpr size_t REQUIRED_SIZE = required_size(); - -#ifdef SNMALLOC_CHECK_CLIENT - // Allocate a power of two extra to allow the placement of the - // pagemap be difficult to guess. - size_t additional_size = bits::next_pow2(REQUIRED_SIZE) * 4; - size_t request_size = REQUIRED_SIZE + additional_size; -#else - size_t request_size = REQUIRED_SIZE; -#endif - - auto new_body_untyped = PAL::reserve(request_size); - - if (new_body_untyped == nullptr) - { - PAL::error("Failed to initialise snmalloc."); - } - -#ifdef SNMALLOC_CHECK_CLIENT - // Begin pagemap at random offset within the additionally allocated space. - static_assert(bits::is_pow2(sizeof(T)), "Next line assumes this."); - size_t offset = get_entropy64() & (additional_size - sizeof(T)); - auto new_body = pointer_offset(new_body_untyped, offset); - - if constexpr (pal_supports) - { - void* start_page = pointer_align_down(new_body); - void* end_page = pointer_align_up( - pointer_offset(new_body, REQUIRED_SIZE)); - // Only commit readonly memory for this range, if the platform supports - // lazy commit. Otherwise, this would be a lot of memory to have - // mapped. - PAL::notify_using_readonly( - start_page, pointer_diff(start_page, end_page)); - } -#else - auto new_body = static_cast(new_body_untyped); -#endif - // Ensure bottom page is committed - // ASSUME: new memory is zeroed. - PAL::template notify_using( - pointer_align_down(new_body), OS_PAGE_SIZE); - - // Set up zero page - new_body[0] = body[0]; - - body = new_body; - body_opt = new_body; - } - - template - std::enable_if_t> get_bounds() - { - static_assert( - has_bounds_ == has_bounds, "Don't set SFINAE template parameter!"); - - return {base, size}; - } - - /** - * Get the number of entries. - */ - [[nodiscard]] constexpr size_t num_entries() const - { - if constexpr (has_bounds) - { - return size >> GRANULARITY_BITS; - } - else - { - return bits::one_at_bit(PAL::address_bits - GRANULARITY_BITS); - } - } - - /** - * If the location has not been used before, then - * `potentially_out_of_range` should be set to true. - * This will ensure there is a location for the - * read/write. - */ - template - T& get_mut(address_t p) - { - if constexpr (potentially_out_of_range) - { - if (SNMALLOC_UNLIKELY(body_opt == nullptr)) - return const_cast(default_value); - } - - if constexpr (has_bounds) - { - if (p - base > size) - { - if constexpr (potentially_out_of_range) - { - return const_cast(default_value); - } - else - { - // Out of range null should - // still return the default value. - if (p == 0) - return const_cast(default_value); - PAL::error("Internal error: Pagemap read access out of range."); - } - } - p = p - base; - } - - // If this is potentially_out_of_range, then the pages will not have - // been mapped. With Lazy commit they will at least be mapped read-only - // Note that: this means external pointer on Windows will be slow. - if constexpr (potentially_out_of_range && !pal_supports) - { - register_range(p, 1); - } - - if constexpr (potentially_out_of_range) - return body_opt[p >> SHIFT]; - else - return body[p >> SHIFT]; - } - - /** - * If the location has not been used before, then - * `potentially_out_of_range` should be set to true. - * This will ensure there is a location for the - * read/write. - */ - template - const T& get(address_t p) - { - return get_mut(p); - } - - /** - * Check if the pagemap has been initialised. - */ - [[nodiscard]] bool is_initialised() const - { - return body_opt != nullptr; - } - - /** - * Return the starting address corresponding to a given entry within the - * Pagemap. Also checks that the reference actually points to a valid entry. - */ - [[nodiscard]] address_t get_address(const T& t) const - { - address_t entry_offset = address_cast(&t) - address_cast(body); - address_t entry_index = entry_offset / sizeof(T); - SNMALLOC_ASSERT( - entry_offset % sizeof(T) == 0 && entry_index < num_entries()); - return base + (entry_index << GRANULARITY_BITS); - } - - void set(address_t p, const T& t) - { -#ifdef SNMALLOC_TRACING - message<1024>("Pagemap.Set {}", p); -#endif - if constexpr (has_bounds) - { - if (p - base > size) - { - PAL::error("Internal error: Pagemap write access out of range."); - } - p = p - base; - } - - body[p >> SHIFT] = t; - } - }; - /** * This is a generic implementation of the backend's interface to the page - * map. It takes a concrete page map implementation (probably FlatPageMap + * map. It takes a concrete page map implementation (probably FlatPagemap * above) and entry type. It is friends with the backend passed in as a * template parameter so that the backend can initialise the concrete page map * and use set_metaentry which no one else should use. */ template< - typename Backend, typename PAL, typename ConcreteMap, - typename PageMapEntry, + typename PagemapEntry, bool fixed_range> class BasicPagemap { @@ -345,10 +26,26 @@ namespace snmalloc /** * Export the type stored in the pagemap. */ - using Entry = PageMapEntry; + using Entry = PagemapEntry; + + static_assert( + std::is_same_v, + "BasicPagemap's PagemapEntry and ConcreteMap disagree!"); - private: - friend Backend; + static_assert( + std::is_base_of_v, + "BasicPagemap's PagemapEntry type is not a MetaEntryBase"); + + /** + * Prevent snmalloc's backend ranges from consolidating across adjacent OS + * allocations on platforms (e.g., Windows or StrictProvenance) where + * that's required. + */ +#if defined(_WIN32) || defined(__CHERI_PURE_CAPABILITY__) + static constexpr bool CONSOLIDATE_PAL_ALLOCS = false; +#else + static constexpr bool CONSOLIDATE_PAL_ALLOCS = true; +#endif /** * Instance of the concrete pagemap, accessible to the backend so that @@ -369,7 +66,6 @@ namespace snmalloc } } - public: /** * Get the metadata associated with a chunk. * @@ -397,10 +93,19 @@ namespace snmalloc /** * Register a range in the pagemap as in-use, requiring it to allow writing * to the underlying memory. + * + * Mark the MetaEntry at the bottom of the range as a boundary, preventing + * consolidation with a lower range, unless CONSOLIDATE_PAL_ALLOCS. */ - static void register_range(address_t p, size_t sz) + static void register_range(capptr::Arena p, size_t sz) { - concretePagemap.register_range(p, sz); + concretePagemap.register_range(address_cast(p), sz); + if constexpr (!CONSOLIDATE_PAL_ALLOCS) + { + // Mark start of allocation in pagemap. + auto& entry = get_metaentry_mut(address_cast(p)); + entry.set_boundary(); + } } /** diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/pagemapregisterrange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/pagemapregisterrange.h index de60dd8dae4b..1a96cb02b793 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/pagemapregisterrange.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/pagemapregisterrange.h @@ -1,39 +1,40 @@ #pragma once +#include "../mem/metadata.h" #include "../pal/pal.h" +#include "empty_range.h" +#include "range_helpers.h" namespace snmalloc { - template< - SNMALLOC_CONCEPT(ConceptBackendMetaRange) Pagemap, - typename ParentRange, - bool CanConsolidate = true> - class PagemapRegisterRange + template + struct PagemapRegisterRange { - ParentRange state{}; - - public: - constexpr PagemapRegisterRange() = default; + template> + class Type : public ContainsParent + { + using ContainsParent::parent; - static constexpr bool Aligned = ParentRange::Aligned; + public: + constexpr Type() = default; - static constexpr bool ConcurrencySafe = ParentRange::ConcurrencySafe; + static constexpr bool Aligned = ParentRange::Aligned; - capptr::Chunk alloc_range(size_t size) - { - auto base = state.alloc_range(size); + static constexpr bool ConcurrencySafe = ParentRange::ConcurrencySafe; - if (base != nullptr) - Pagemap::register_range(address_cast(base), size); + using ChunkBounds = typename ParentRange::ChunkBounds; - if (!CanConsolidate) + CapPtr alloc_range(size_t size) { - // Mark start of allocation in pagemap. - auto& entry = Pagemap::get_metaentry_mut(address_cast(base)); - entry.set_boundary(); - } + auto base = parent.alloc_range(size); + + if (base != nullptr) + { + Pagemap::register_range(base, size); + } - return base; - } + return base; + } + }; }; } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/palrange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/palrange.h index 0962e00bfc3a..ade65294a82c 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/palrange.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/palrange.h @@ -3,7 +3,7 @@ namespace snmalloc { - template + template class PalRange { public: @@ -14,9 +14,11 @@ namespace snmalloc // need to be changed. static constexpr bool ConcurrencySafe = true; + using ChunkBounds = capptr::bounds::Arena; + constexpr PalRange() = default; - capptr::Chunk alloc_range(size_t size) + capptr::Arena alloc_range(size_t size) { if (bits::next_pow2_bits(size) >= bits::BITS - 1) { @@ -26,8 +28,8 @@ namespace snmalloc if constexpr (pal_supports) { SNMALLOC_ASSERT(size >= PAL::minimum_alloc_size); - auto result = - capptr::Chunk(PAL::template reserve_aligned(size)); + auto result = capptr::Arena::unsafe_from( + PAL::template reserve_aligned(size)); #ifdef SNMALLOC_TRACING message<1024>("Pal range alloc: {} ({})", result.unsafe_ptr(), size); @@ -36,7 +38,7 @@ namespace snmalloc } else { - auto result = capptr::Chunk(PAL::reserve(size)); + auto result = capptr::Arena::unsafe_from(PAL::reserve(size)); #ifdef SNMALLOC_TRACING message<1024>("Pal range alloc: {} ({})", result.unsafe_ptr(), size); diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/range_helpers.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/range_helpers.h index a9dc43c8619b..076b9fd74072 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/range_helpers.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/range_helpers.h @@ -4,8 +4,8 @@ namespace snmalloc { - template - void range_to_pow_2_blocks(capptr::Chunk base, size_t length, F f) + template + void range_to_pow_2_blocks(CapPtr base, size_t length, F f) { auto end = pointer_offset(base, length); base = pointer_align_up(base, bits::one_at_bit(MIN_BITS)); @@ -16,7 +16,7 @@ namespace snmalloc // Find the minimum set of maximally aligned blocks in this range. // Each block's alignment and size are equal. - while (length >= sizeof(void*)) + while (length >= bits::one_at_bit(MIN_BITS)) { size_t base_align_bits = bits::ctz(address_cast(base)); size_t length_align_bits = (bits::BITS - 1) - bits::clz(length); @@ -34,4 +34,131 @@ namespace snmalloc length -= align; } } + + /** + * Forward definition to allow multiple template specialisations. + * + * This struct is used to recursively compose ranges. + */ + template + struct PipeImpl; + + /** + * Base case of one range that needs nothing. + */ + template + struct PipeImpl + { + using result = Only; + }; + + /** + * Recursive case of applying a base range as an argument to the + * next, and then using that as the new base range. + */ + template + struct PipeImpl + { + public: + using result = + typename PipeImpl, Rest...>::result; + }; + + /** + * Nice type so the caller doesn't need to call result explicitly. + */ + template + using Pipe = typename PipeImpl::result; + + /** + * Helper class for allowing a range to be navigated to find an + * ancestor of a specific type. The parent is an instance field. + */ + template + class ContainsParent + { + protected: + Parent parent{}; + + public: + /** + * Returns the outermost Ancestor with the correct type. + * + * Fails to compile if no such ancestor exists. + */ + template + Anc* ancestor() + { + if constexpr (std::is_same_v) + { + return &parent; + } + else + { + return parent.template ancestor(); + } + } + }; + + /** + * Helper class for allowing a range to be navigated to find an + * ancestor of a specific type. The parent is a static field. + */ + template + class StaticParent + { + protected: + SNMALLOC_REQUIRE_CONSTINIT inline static Parent parent{}; + + public: + /** + * Returns the outermost Ancestor with the correct type. + * + * Fails to compile if no such ancestor exists. + */ + template + Anc* ancestor() + { + if constexpr (std::is_same_v) + { + return &parent; + } + else + { + return parent.template ancestor(); + } + } + }; + + /** + * Helper class for allowing a range to be navigated to find an + * ancestor of a specific type. The parent is a pointer to a range; + * this allows the parent to be shared. + */ + template + class RefParent + { + protected: + Parent* parent{}; + + public: + /** + * Returns the outermost Ancestor with the correct type. + * + * Fails to compile if no such ancestor exists. + */ + template + Anc* ancestor() + { + if constexpr (std::is_same_v) + { + return parent; + } + else + { + return parent->template ancestor(); + } + } + }; + } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/smallbuddyrange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/smallbuddyrange.h index b212323b5d08..83796e1ecbe4 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/smallbuddyrange.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/smallbuddyrange.h @@ -1,6 +1,7 @@ #pragma once #include "../pal/pal.h" +#include "empty_range.h" #include "range_helpers.h" namespace snmalloc @@ -9,20 +10,22 @@ namespace snmalloc * struct for representing the redblack nodes * directly inside the meta data. */ + template struct FreeChunk { - capptr::Chunk left; - capptr::Chunk right; + CapPtr left; + CapPtr right; }; /** * Class for using the allocations own space to store in the RBTree. */ + template class BuddyInplaceRep { public: - using Handle = capptr::Chunk*; - using Contents = capptr::Chunk; + using Handle = CapPtr, bounds>*; + using Contents = CapPtr, bounds>; static constexpr Contents null = nullptr; static constexpr Contents root = nullptr; @@ -32,17 +35,17 @@ namespace snmalloc { SNMALLOC_ASSERT((address_cast(r) & MASK) == 0); if (r == nullptr) - *ptr = capptr::Chunk( - reinterpret_cast((*ptr).unsafe_uintptr() & MASK)); + *ptr = CapPtr, bounds>::unsafe_from( + reinterpret_cast*>((*ptr).unsafe_uintptr() & MASK)); else // Preserve lower bit. *ptr = pointer_offset(r, (address_cast(*ptr) & MASK)) - .template as_static(); + .template as_static>(); } static Contents get(Handle ptr) { - return pointer_align_down<2, FreeChunk>((*ptr).as_void()); + return pointer_align_down<2, FreeChunk>((*ptr).as_void()); } static Handle ref(bool direction, Contents r) @@ -65,14 +68,16 @@ namespace snmalloc if (new_is_red != is_red(k)) { auto r = ref(false, k); - auto old_addr = pointer_align_down<2, FreeChunk>(r->as_void()); + auto old_addr = pointer_align_down<2, FreeChunk>(r->as_void()); if (new_is_red) { if (old_addr == nullptr) - *r = capptr::Chunk(reinterpret_cast(MASK)); + *r = CapPtr, bounds>::unsafe_from( + reinterpret_cast*>(MASK)); else - *r = pointer_offset(old_addr, MASK).template as_static(); + *r = pointer_offset(old_addr, MASK) + .template as_static>(); } else { @@ -84,21 +89,22 @@ namespace snmalloc static Contents offset(Contents k, size_t size) { - return pointer_offset(k, size).template as_static(); + return pointer_offset(k, size).template as_static>(); } static Contents buddy(Contents k, size_t size) { // This is just doing xor size, but with what API // exists on capptr. - auto base = pointer_align_down(k.as_void(), size * 2); + auto base = pointer_align_down>(k.as_void(), size * 2); auto offset = (address_cast(k) & size) ^ size; - return pointer_offset(base, offset).template as_static(); + return pointer_offset(base, offset) + .template as_static>(); } static Contents align_down(Contents k, size_t size) { - return pointer_align_down(k.as_void(), size); + return pointer_align_down>(k.as_void(), size); } static bool compare(Contents k1, Contents k2) @@ -142,94 +148,105 @@ namespace snmalloc } }; - template - class SmallBuddyRange + struct SmallBuddyRange { - ParentRange parent{}; + template> + class Type : public ContainsParent + { + public: + using ChunkBounds = typename ParentRange::ChunkBounds; - static constexpr size_t MIN_BITS = - bits::next_pow2_bits_const(sizeof(FreeChunk)); + private: + using ContainsParent::parent; - Buddy buddy_small; + static constexpr size_t MIN_BITS = + bits::next_pow2_bits_const(sizeof(FreeChunk)); - /** - * Add a range of memory to the address space. - * Divides blocks into power of two sizes with natural alignment - */ - void add_range(capptr::Chunk base, size_t length) - { - range_to_pow_2_blocks( - base, length, [this](capptr::Chunk base, size_t align, bool) { - capptr::Chunk overflow = - buddy_small.add_block(base.as_reinterpret(), align) - .template as_reinterpret(); - if (overflow != nullptr) - parent.dealloc_range(overflow, bits::one_at_bit(MIN_CHUNK_BITS)); - }); - } + Buddy, MIN_BITS, MIN_CHUNK_BITS> buddy_small; - capptr::Chunk refill(size_t size) - { - auto refill = parent.alloc_range(MIN_CHUNK_SIZE); + /** + * Add a range of memory to the address space. + * Divides blocks into power of two sizes with natural alignment + */ + void add_range(CapPtr base, size_t length) + { + range_to_pow_2_blocks( + base, + length, + [this](CapPtr base, size_t align, bool) { + if (align < MIN_CHUNK_SIZE) + { + CapPtr overflow = + buddy_small + .add_block( + base.template as_reinterpret>(), + align) + .template as_reinterpret(); + if (overflow != nullptr) + parent.dealloc_range( + overflow, bits::one_at_bit(MIN_CHUNK_BITS)); + } + else + { + parent.dealloc_range(base, align); + } + }); + } - if (refill != nullptr) - add_range(pointer_offset(refill, size), MIN_CHUNK_SIZE - size); + CapPtr refill(size_t size) + { + auto refill = parent.alloc_range(MIN_CHUNK_SIZE); - return refill; - } + if (refill != nullptr) + add_range(pointer_offset(refill, size), MIN_CHUNK_SIZE - size); - public: - static constexpr bool Aligned = true; - static_assert(ParentRange::Aligned, "ParentRange must be aligned"); + return refill; + } - static constexpr bool ConcurrencySafe = false; + public: + static constexpr bool Aligned = true; + static_assert(ParentRange::Aligned, "ParentRange must be aligned"); - constexpr SmallBuddyRange() = default; + static constexpr bool ConcurrencySafe = false; - capptr::Chunk alloc_range(size_t size) - { - if (size >= MIN_CHUNK_SIZE) - { - return parent.alloc_range(size); - } + constexpr Type() = default; - auto result = buddy_small.remove_block(size); - if (result != nullptr) + CapPtr alloc_range(size_t size) { - result->left = nullptr; - result->right = nullptr; - return result.template as_reinterpret(); - } - return refill(size); - } + if (size >= MIN_CHUNK_SIZE) + return parent.alloc_range(size); - capptr::Chunk alloc_range_with_leftover(size_t size) - { - SNMALLOC_ASSERT(size <= MIN_CHUNK_SIZE); + auto result = buddy_small.remove_block(size); + if (result != nullptr) + { + result->left = nullptr; + result->right = nullptr; + return result.template as_reinterpret(); + } + return refill(size); + } - auto rsize = bits::next_pow2(size); + CapPtr alloc_range_with_leftover(size_t size) + { + auto rsize = bits::next_pow2(size); - auto result = alloc_range(rsize); + auto result = alloc_range(rsize); - if (result == nullptr) - return nullptr; + if (result == nullptr) + return nullptr; - auto remnant = pointer_offset(result, size); + auto remnant = pointer_offset(result, size); - add_range(remnant, rsize - size); + add_range(remnant, rsize - size); - return result.template as_reinterpret(); - } + return result.template as_reinterpret(); + } - void dealloc_range(capptr::Chunk base, size_t size) - { - if (size >= MIN_CHUNK_SIZE) + void dealloc_range(CapPtr base, size_t size) { - parent.dealloc_range(base, size); - return; + SNMALLOC_ASSERT(bits::is_pow2(size)); + add_range(base, size); } - - add_range(base, size); - } + }; }; } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/staticconditionalrange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/staticconditionalrange.h new file mode 100644 index 000000000000..46637135d794 --- /dev/null +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/staticconditionalrange.h @@ -0,0 +1,78 @@ +#pragma once +#include "../pal/pal.h" +#include "empty_range.h" +#include "range_helpers.h" + +namespace snmalloc +{ + template + struct StaticConditionalRange + { + // This is a range that can bypass the OptionalRange if it is disabled. + // Disabling is global, and not local. + // This is used to allow disabling thread local buddy allocators when the + // initial fixed size heap is small. + // + // The range builds a more complex parent + // Pipe + // and uses the ancestor functions to bypass the OptionalRange if the flag + // has been set. + template + class Type : public ContainsParent> + { + // This contains connects the optional range to the parent range. + using ActualParentRange = Pipe; + + using ContainsParent::parent; + + // Global flag specifying if the optional range should be disabled. + static inline bool disable_range_{false}; + + public: + // Both parent and grandparent must be aligned for this range to be + // aligned. + static constexpr bool Aligned = + ActualParentRange::Aligned && ParentRange::Aligned; + + // Both parent and grandparent must be aligned for this range to be + // concurrency safe. + static constexpr bool ConcurrencySafe = + ActualParentRange::ConcurrencySafe && ParentRange::ConcurrencySafe; + + using ChunkBounds = typename ActualParentRange::ChunkBounds; + + static_assert( + std::is_same_v, + "Grandparent and optional parent range chunk bounds must be equal"); + + constexpr Type() = default; + + CapPtr alloc_range(size_t size) + { + if (disable_range_) + { + // Use ancestor to bypass the optional range. + return this->template ancestor()->alloc_range(size); + } + + return parent.alloc_range(size); + } + + void dealloc_range(CapPtr base, size_t size) + { + if (disable_range_) + { + // Use ancestor to bypass the optional range. + this->template ancestor()->dealloc_range(base, size); + return; + } + parent.dealloc_range(base, size); + } + + static void disable_range() + { + disable_range_ = true; + } + }; + }; +} // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/staticrange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/staticrange.h new file mode 100644 index 000000000000..2996c0fbe7bc --- /dev/null +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/staticrange.h @@ -0,0 +1,42 @@ +#pragma once + +#include "../ds/ds.h" +#include "empty_range.h" + +namespace snmalloc +{ + /** + * Makes the supplied ParentRange into a global variable. + */ + struct StaticRange + { + template> + class Type : public StaticParent + { + using StaticParent::parent; + + public: + static constexpr bool Aligned = ParentRange::Aligned; + + static_assert( + ParentRange::ConcurrencySafe, + "StaticRange requires a concurrency safe parent."); + + static constexpr bool ConcurrencySafe = true; + + using ChunkBounds = typename ParentRange::ChunkBounds; + + constexpr Type() = default; + + CapPtr alloc_range(size_t size) + { + return parent.alloc_range(size); + } + + void dealloc_range(CapPtr base, size_t size) + { + parent.dealloc_range(base, size); + } + }; + }; +} // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/statsrange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/statsrange.h index 98a06aec38ed..8548be9cb377 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/statsrange.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/statsrange.h @@ -1,5 +1,8 @@ #pragma once +#include "empty_range.h" +#include "range_helpers.h" + #include namespace snmalloc @@ -7,52 +10,57 @@ namespace snmalloc /** * Used to measure memory usage. */ - template - class StatsRange + struct StatsRange { - ParentRange parent{}; + template> + class Type : public ContainsParent + { + using ContainsParent::parent; - static inline std::atomic current_usage{}; - static inline std::atomic peak_usage{}; + static inline std::atomic current_usage{}; + static inline std::atomic peak_usage{}; - public: - static constexpr bool Aligned = ParentRange::Aligned; + public: + static constexpr bool Aligned = ParentRange::Aligned; - static constexpr bool ConcurrencySafe = ParentRange::ConcurrencySafe; + static constexpr bool ConcurrencySafe = ParentRange::ConcurrencySafe; - constexpr StatsRange() = default; + using ChunkBounds = typename ParentRange::ChunkBounds; - capptr::Chunk alloc_range(size_t size) - { - auto result = parent.alloc_range(size); - if (result != nullptr) + constexpr Type() = default; + + CapPtr alloc_range(size_t size) { - auto prev = current_usage.fetch_add(size); - auto curr = peak_usage.load(); - while (curr < prev + size) + auto result = parent.alloc_range(size); + if (result != nullptr) { - if (peak_usage.compare_exchange_weak(curr, prev + size)) - break; + auto prev = current_usage.fetch_add(size); + auto curr = peak_usage.load(); + while (curr < prev + size) + { + if (peak_usage.compare_exchange_weak(curr, prev + size)) + break; + } } + return result; } - return result; - } - void dealloc_range(capptr::Chunk base, size_t size) - { - current_usage -= size; - parent.dealloc_range(base, size); - } + void dealloc_range(CapPtr base, size_t size) + { + current_usage -= size; + parent.dealloc_range(base, size); + } - size_t get_current_usage() - { - return current_usage.load(); - } + size_t get_current_usage() + { + return current_usage.load(); + } - size_t get_peak_usage() - { - return peak_usage.load(); - } + size_t get_peak_usage() + { + return peak_usage.load(); + } + }; }; template diff --git a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/subrange.h b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/subrange.h index 3c6617d7cd30..8d886a2b8d5a 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/subrange.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/backend_helpers/subrange.h @@ -1,5 +1,6 @@ #pragma once #include "../mem/mem.h" +#include "empty_range.h" namespace snmalloc { @@ -8,37 +9,43 @@ namespace snmalloc * 2^RATIO_BITS. Will not return a the block at the start or * the end of the large allocation. */ - template - class SubRange + template + struct SubRange { - ParentRange parent{}; + template> + class Type : public ContainsParent + { + using ContainsParent::parent; - public: - constexpr SubRange() = default; + public: + constexpr Type() = default; - static constexpr bool Aligned = ParentRange::Aligned; + static constexpr bool Aligned = ParentRange::Aligned; - static constexpr bool ConcurrencySafe = ParentRange::ConcurrencySafe; + static constexpr bool ConcurrencySafe = ParentRange::ConcurrencySafe; - capptr::Chunk alloc_range(size_t sub_size) - { - SNMALLOC_ASSERT(bits::is_pow2(sub_size)); - - auto full_size = sub_size << RATIO_BITS; - auto overblock = parent.alloc_range(full_size); - if (overblock == nullptr) - return nullptr; - - size_t offset_mask = full_size - sub_size; - // Don't use first or last block in the larger reservation - // Loop required to get uniform distribution. - size_t offset; - do - { - offset = get_entropy64() & offset_mask; - } while ((offset == 0) || (offset == offset_mask)); + using ChunkBounds = typename ParentRange::ChunkBounds; - return pointer_offset(overblock, offset); - } + CapPtr alloc_range(size_t sub_size) + { + SNMALLOC_ASSERT(bits::is_pow2(sub_size)); + + auto full_size = sub_size << RATIO_BITS; + auto overblock = parent.alloc_range(full_size); + if (overblock == nullptr) + return nullptr; + + size_t offset_mask = full_size - sub_size; + // Don't use first or last block in the larger reservation + // Loop required to get uniform distribution. + size_t offset; + do + { + offset = get_entropy64() & offset_mask; + } while ((offset == 0) || (offset == offset_mask)); + + return pointer_offset(overblock, offset); + } + }; }; } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/aba.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/aba.h index 51c447035d63..f14cc9ef685a 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds/aba.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/aba.h @@ -141,7 +141,7 @@ namespace snmalloc class ABA { std::atomic ptr = nullptr; - std::atomic_flag lock = ATOMIC_FLAG_INIT; + std::atomic lock{false}; public: // This method is used in Verona @@ -154,7 +154,7 @@ namespace snmalloc Cmp read() { - while (lock.test_and_set(std::memory_order_acquire)) + while (lock.exchange(true, std::memory_order_acquire)) Aal::pause(); # if !defined(NDEBUG) && !defined(SNMALLOC_DISABLE_ABA_VERIFY) @@ -184,7 +184,7 @@ namespace snmalloc ~Cmp() { - parent->lock.clear(std::memory_order_release); + parent->lock.store(false, std::memory_order_release); # if !defined(NDEBUG) && !defined(SNMALLOC_DISABLE_ABA_VERIFY) operation_in_flight = false; # endif diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/allocconfig.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/allocconfig.h index b3e789e58d6d..858940f05e50 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds/allocconfig.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/allocconfig.h @@ -26,22 +26,52 @@ namespace snmalloc static constexpr size_t MIN_ALLOC_BITS = bits::ctz_const(MIN_ALLOC_SIZE); // Minimum slab size. - static constexpr size_t MIN_CHUNK_BITS = bits::max( - static_cast(14), bits::next_pow2_bits_const(OS_PAGE_SIZE)); +#if defined(SNMALLOC_QEMU_WORKAROUND) && defined(SNMALLOC_VA_BITS_64) + /* + * QEMU user-mode, up through and including v7.2.0-rc4, the latest tag at the + * time of this writing, does not use a tree of any sort to store its opinion + * of the address space, allocating an amount of memory linear in the size of + * any created map, not the number of pages actually used. This is + * exacerbated in and after qemu v6 (or, more specifically, d9c58585), which + * grew the proportionality constant. + * + * In any case, for our CI jobs, then, use a larger minimum chunk size (that + * is, pagemap granularity) than by default to reduce the size of the + * pagemap. We can't raise this *too* much, lest we hit constexpr step + * limits in the sizeclasstable magic! 17 bits seems to be the sweet spot + * and means that any of our tests can run in a little under 2 GiB of RSS + * even on QEMU versions after v6. + */ + static constexpr size_t MIN_CHUNK_BITS = static_cast(17); +#else + static constexpr size_t MIN_CHUNK_BITS = static_cast(14); +#endif static constexpr size_t MIN_CHUNK_SIZE = bits::one_at_bit(MIN_CHUNK_BITS); // Minimum number of objects on a slab -#ifdef SNMALLOC_CHECK_CLIENT - static constexpr size_t MIN_OBJECT_COUNT = 13; -#else - static constexpr size_t MIN_OBJECT_COUNT = 4; -#endif + static constexpr size_t MIN_OBJECT_COUNT = + mitigations(random_larger_thresholds) ? 13 : 4; // Maximum size of an object that uses sizeclasses. +#if defined(SNMALLOC_QEMU_WORKAROUND) && defined(SNMALLOC_VA_BITS_64) + /* + * As a consequence of our significantly larger minimum chunk size, we need + * to raise the threshold for what constitutes a large object (which must + * be a multiple of the minimum chunk size). Extend the space of small + * objects up enough to match yet preserve the notion that there exist small + * objects larger than MIN_CHUNK_SIZE. + */ + static constexpr size_t MAX_SMALL_SIZECLASS_BITS = 19; +#else static constexpr size_t MAX_SMALL_SIZECLASS_BITS = 16; +#endif static constexpr size_t MAX_SMALL_SIZECLASS_SIZE = bits::one_at_bit(MAX_SMALL_SIZECLASS_BITS); + static_assert( + MAX_SMALL_SIZECLASS_SIZE >= MIN_CHUNK_SIZE, + "Large sizes need to be representable by as a multiple of MIN_CHUNK_SIZE"); + // Number of slots for remote deallocation. static constexpr size_t REMOTE_SLOT_BITS = 8; static constexpr size_t REMOTE_SLOTS = 1 << REMOTE_SLOT_BITS; @@ -62,4 +92,10 @@ namespace snmalloc 1 << MIN_CHUNK_BITS #endif ; + + // Used to configure when the backend should use thread local buddies. + // This only basically is used to disable some buddy allocators on small + // fixed heap scenarios like OpenEnclave. + static constexpr size_t MIN_HEAP_SIZE_FOR_THREAD_LOCAL_BUDDY = + bits::one_at_bit(27); } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/ds.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/ds.h index 432277dcb608..4cfa22b9b9d3 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds/ds.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/ds.h @@ -6,6 +6,8 @@ #include "../pal/pal.h" #include "aba.h" #include "allocconfig.h" +#include "entropy.h" #include "flaglock.h" #include "mpmcstack.h" +#include "pagemap.h" #include "singleton.h" diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/entropy.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/entropy.h new file mode 100644 index 000000000000..431495e2e645 --- /dev/null +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/entropy.h @@ -0,0 +1,26 @@ +#pragma once + +#ifndef SNMALLOC_PLATFORM_HAS_GETENTROPY +# include +#endif + +namespace snmalloc +{ + template + std::enable_if_t, uint64_t> get_entropy64() + { + return PAL::get_entropy64(); + } + + template + std::enable_if_t, uint64_t> get_entropy64() + { +#ifdef SNMALLOC_PLATFORM_HAS_GETENTROPY + return DefaultPal::get_entropy64(); +#else + std::random_device rd; + uint64_t a = rd(); + return (a << 32) ^ rd(); +#endif + } +} // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/flaglock.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/flaglock.h index 5fbbf0a0a757..4a539e636078 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds/flaglock.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/flaglock.h @@ -1,6 +1,7 @@ #pragma once #include "../aal/aal.h" +#include "../pal/pal.h" #include #include @@ -14,6 +15,8 @@ namespace snmalloc */ struct DebugFlagWord { + using ThreadIdentity = DefaultPal::ThreadIdentity; + /** * @brief flag * The underlying atomic field. @@ -32,7 +35,7 @@ namespace snmalloc */ void set_owner() { - SNMALLOC_ASSERT(nullptr == owner); + SNMALLOC_ASSERT(ThreadIdentity() == owner); owner = get_thread_identity(); } @@ -43,7 +46,7 @@ namespace snmalloc void clear_owner() { SNMALLOC_ASSERT(get_thread_identity() == owner); - owner = nullptr; + owner = ThreadIdentity(); } /** @@ -56,24 +59,19 @@ namespace snmalloc } private: - using ThreadIdentity = int const*; - /** * @brief owner - * We use a pointer to TLS field as the thread identity. - * std::thread::id can be another solution but it does not - * support `constexpr` initialisation on some platforms. + * We use the Pal to provide the ThreadIdentity. */ - std::atomic owner = nullptr; + std::atomic owner = ThreadIdentity(); /** * @brief get_thread_identity * @return The identity of current thread. */ - inline ThreadIdentity get_thread_identity() + static ThreadIdentity get_thread_identity() { - static thread_local int SNMALLOC_THREAD_IDENTITY = 0; - return &SNMALLOC_THREAD_IDENTITY; + return DefaultPal::get_tid(); } }; diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds/pagemap.h b/3rdparty/exported/snmalloc/src/snmalloc/ds/pagemap.h new file mode 100644 index 000000000000..267fe9a0b30c --- /dev/null +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds/pagemap.h @@ -0,0 +1,370 @@ +#pragma once + +namespace snmalloc +{ + /** + * Simple pagemap that for each GRANULARITY_BITS of the address range + * stores a T. + */ + template + class FlatPagemap + { + public: + static constexpr size_t SHIFT = GRANULARITY_BITS; + static constexpr size_t GRANULARITY = bits::one_at_bit(GRANULARITY_BITS); + + private: + /** + * Before init is called will contain a single entry + * that is the default value. This is needed so that + * various calls do not have to check for nullptr. + * free(nullptr) + * and + * malloc_usable_size(nullptr) + * do not require an allocation to have ocurred before + * they are called. + */ + inline static const T default_value{}; + + /** + * The representation of the page map. + * + * Initially a single element to ensure nullptr operations + * work. + */ + T* body{const_cast(&default_value)}; + + /** + * The representation of the pagemap, but nullptr if it has not been + * initialised. Used to combine init checking and lookup. + */ + T* body_opt{nullptr}; + + /** + * If `has_bounds` is set, then these should contain the + * bounds of the heap that is being managed by this pagemap. + */ + address_t base{0}; + size_t size{0}; + + public: + using EntryType = T; + + /** + * Ensure this range of pagemap is accessible + */ + void register_range(address_t p, size_t length) + { + SNMALLOC_ASSERT(is_initialised()); + + // Calculate range in pagemap that is associated to this space. + auto first = &body[p >> SHIFT]; + auto last = &body[(p + length + bits::one_at_bit(SHIFT) - 1) >> SHIFT]; + + // Commit OS pages associated to the range. + auto page_start = pointer_align_down(first); + auto page_end = pointer_align_up(last); + size_t using_size = pointer_diff(page_start, page_end); + PAL::template notify_using(page_start, using_size); + } + + constexpr FlatPagemap() = default; + + /** + * For pagemaps that cover an entire fixed address space, return the size + * that they must be. This allows the caller to allocate the correct + * amount of memory to be passed to `init`. This is not available for + * fixed-range pagemaps, whose size depends on dynamic configuration. + */ + template + static constexpr std::enable_if_t required_size() + { + static_assert( + has_bounds_ == has_bounds, "Don't set SFINAE template parameter!"); + constexpr size_t COVERED_BITS = PAL::address_bits - GRANULARITY_BITS; + constexpr size_t ENTRIES = bits::one_at_bit(COVERED_BITS); + return ENTRIES * sizeof(T); + } + + /** + * Initialise with pre-allocated memory. + * + * This is currently disabled for bounded pagemaps but may be reenabled if + * `required_size` is enabled for the has-bounds case. + */ + template + std::enable_if_t init(T* address) + { + SNMALLOC_ASSERT(!is_initialised()); + + static_assert( + has_bounds_ == has_bounds, "Don't set SFINAE template parameter!"); + body = address; + body_opt = address; + } + + /** + * Initialise the pagemap with bounds. + * + * Returns usable range after pagemap has been allocated + */ + template + std::enable_if_t> + init(void* b, size_t s) + { + SNMALLOC_ASSERT(!is_initialised()); + + static_assert( + has_bounds_ == has_bounds, "Don't set SFINAE template parameter!"); +#ifdef SNMALLOC_TRACING + message<1024>("Pagemap.init {} ({})", b, s); +#endif + SNMALLOC_ASSERT(s != 0); + // TODO take account of pagemap size in the calculation of how big it + // needs to be. The following code creates a pagemap that covers the + // pagemap as well as the left over. This is not ideal, and we should + // really calculate the division with + // + // GRANULARITY + sizeof(T) + // + // There are awkward corner cases for the alignment of the start and + // the end that are hard to calculate. So this is not currently done. + + // Calculate range in pagemap that is associated to this space. + // Over calculate to cover any unaligned parts at either end. + base = bits::align_down(address_cast(b), GRANULARITY); + auto end = bits::align_up(address_cast(b) + s, GRANULARITY); + size = end - base; + + // Setup the pagemap. + body = static_cast(b); + body_opt = body; + + // Calculate size of pagemap. + auto pagemap_size = (size >> SHIFT) * sizeof(T); + + // Advance by size of pagemap. + // TODO CHERI capability bound here! + auto heap_base = pointer_offset(b, pagemap_size); + + // The following assert prevents the corner case where the pagemap + // occupies the entire address space, and this + // s - pagemap_size + // can underflow. + static_assert( + sizeof(T) < (1 << SHIFT), + "Pagemap entry too large relative to granularity"); + + if (pagemap_size > s) + { + // The pagemap is larger than the available space. + error("Pagemap is larger than the available space."); + } + + return {heap_base, s - pagemap_size}; + } + + /** + * Initialise the pagemap without bounds. + */ + template + std::enable_if_t init() + { + SNMALLOC_ASSERT(!is_initialised()); + + static_assert( + has_bounds_ == has_bounds, "Don't set SFINAE template parameter!"); + static constexpr size_t REQUIRED_SIZE = required_size(); + + // Allocate a power of two extra to allow the placement of the + // pagemap be difficult to guess if randomize_position set. + size_t additional_size = + randomize_position ? bits::next_pow2(REQUIRED_SIZE) * 4 : 0; + size_t request_size = REQUIRED_SIZE + additional_size; + + auto new_body_untyped = PAL::reserve(request_size); + + if (new_body_untyped == nullptr) + { + PAL::error("Failed to initialise snmalloc."); + } + + T* new_body; + + if constexpr (randomize_position) + { + // Begin pagemap at random offset within the additionally allocated + // space. + static_assert(bits::is_pow2(sizeof(T)), "Next line assumes this."); + size_t offset = get_entropy64() & (additional_size - sizeof(T)); + new_body = pointer_offset(new_body_untyped, offset); + + if constexpr (pal_supports) + { + void* start_page = pointer_align_down(new_body); + void* end_page = pointer_align_up( + pointer_offset(new_body, REQUIRED_SIZE)); + // Only commit readonly memory for this range, if the platform + // supports lazy commit. Otherwise, this would be a lot of memory to + // have mapped. + PAL::notify_using_readonly( + start_page, pointer_diff(start_page, end_page)); + } + } + else + { + if constexpr (pal_supports) + { + PAL::notify_using_readonly(new_body_untyped, REQUIRED_SIZE); + } + new_body = static_cast(new_body_untyped); + } + // Ensure bottom page is committed + // ASSUME: new memory is zeroed. + PAL::template notify_using( + pointer_align_down(new_body), OS_PAGE_SIZE); + + // Set up zero page + new_body[0] = body[0]; + + body = new_body; + body_opt = new_body; + } + + template + std::enable_if_t> get_bounds() + { + SNMALLOC_ASSERT(is_initialised()); + + static_assert( + has_bounds_ == has_bounds, "Don't set SFINAE template parameter!"); + + return {base, size}; + } + + /** + * Get the number of entries. + */ + [[nodiscard]] constexpr size_t num_entries() const + { + SNMALLOC_ASSERT(is_initialised()); + + if constexpr (has_bounds) + { + return size >> GRANULARITY_BITS; + } + else + { + return bits::one_at_bit(PAL::address_bits - GRANULARITY_BITS); + } + } + + /** + * + * Get a non-constant reference to the slot of this pagemap corresponding + * to a particular address. + * + * If the location has not been used before, then + * `potentially_out_of_range` should be set to true. This will ensure + * there is memory backing the returned reference. + */ + template + T& get_mut(address_t p) + { + if constexpr (potentially_out_of_range) + { + if (SNMALLOC_UNLIKELY(body_opt == nullptr)) + return const_cast(default_value); + } + + SNMALLOC_ASSERT(is_initialised() || p == 0); + + if constexpr (has_bounds) + { + if (p - base > size) + { + if constexpr (potentially_out_of_range) + { + return const_cast(default_value); + } + else + { + // Out of range null should + // still return the default value. + if (p == 0) + return const_cast(default_value); + PAL::error("Internal error: Pagemap read access out of range."); + } + } + p = p - base; + } + + // If this is potentially_out_of_range, then the pages will not have + // been mapped. With Lazy commit they will at least be mapped read-only + // Note that: this means external pointer on Windows will be slow. + if constexpr (potentially_out_of_range && !pal_supports) + { + register_range(p, 1); + } + + if constexpr (potentially_out_of_range) + return body_opt[p >> SHIFT]; + else + return body[p >> SHIFT]; + } + + /** + * Get a constant reference to the slot of this pagemap corresponding to a + * particular address. + * + * If the location has not been used before, then + * `potentially_out_of_range` should be set to true. This will ensure + * there is memory backing any reads through the returned reference. + */ + template + const T& get(address_t p) + { + return get_mut(p); + } + + /** + * Check if the pagemap has been initialised. + */ + [[nodiscard]] bool is_initialised() const + { + return body_opt != nullptr; + } + + /** + * Return the starting address corresponding to a given entry within the + * Pagemap. Also checks that the reference actually points to a valid entry. + */ + [[nodiscard]] address_t get_address(const T& t) const + { + SNMALLOC_ASSERT(is_initialised()); + address_t entry_offset = address_cast(&t) - address_cast(body); + address_t entry_index = entry_offset / sizeof(T); + SNMALLOC_ASSERT( + entry_offset % sizeof(T) == 0 && entry_index < num_entries()); + return base + (entry_index << GRANULARITY_BITS); + } + + void set(address_t p, const T& t) + { + SNMALLOC_ASSERT(is_initialised()); +#ifdef SNMALLOC_TRACING + message<1024>("Pagemap.Set {}", p); +#endif + if constexpr (has_bounds) + { + if (p - base > size) + { + PAL::error("Internal error: Pagemap write access out of range."); + } + p = p - base; + } + + body[p >> SHIFT] = t; + } + }; +} // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/bits.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/bits.h index f1dc4ffd6454..b82ee846e318 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/bits.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/bits.h @@ -96,14 +96,14 @@ namespace snmalloc #endif } - inline constexpr size_t rotr_const(size_t x, size_t n) + constexpr size_t rotr_const(size_t x, size_t n) { size_t nn = n & (BITS - 1); return (x >> nn) | (x << ((static_cast(-static_cast(nn))) & (BITS - 1))); } - inline constexpr size_t rotl_const(size_t x, size_t n) + constexpr size_t rotl_const(size_t x, size_t n) { size_t nn = n & (BITS - 1); return (x << nn) | @@ -260,12 +260,12 @@ namespace snmalloc return one_at_bit(BITS - (clz_const(x + 1) + 1)); } - inline constexpr size_t next_pow2_bits_const(size_t x) + constexpr size_t next_pow2_bits_const(size_t x) { return BITS - clz_const(x - 1); } - inline constexpr SNMALLOC_FAST_PATH size_t + constexpr SNMALLOC_FAST_PATH size_t align_down(size_t value, size_t alignment) { SNMALLOC_ASSERT(is_pow2(alignment)); @@ -275,8 +275,7 @@ namespace snmalloc return value; } - inline constexpr SNMALLOC_FAST_PATH size_t - align_up(size_t value, size_t alignment) + constexpr SNMALLOC_FAST_PATH size_t align_up(size_t value, size_t alignment) { SNMALLOC_ASSERT(is_pow2(alignment)); @@ -331,7 +330,7 @@ namespace snmalloc } template - constexpr static size_t to_exp_mant_const(size_t value) + constexpr size_t to_exp_mant_const(size_t value) { constexpr size_t LEADING_BIT = one_at_bit(MANTISSA_BITS + LOW_BITS) >> 1; constexpr size_t MANTISSA_MASK = one_at_bit(MANTISSA_BITS) - 1; @@ -347,7 +346,7 @@ namespace snmalloc } template - constexpr static size_t from_exp_mant(size_t m_e) + constexpr size_t from_exp_mant(size_t m_e) { if (MANTISSA_BITS > 0) { @@ -371,7 +370,7 @@ namespace snmalloc * We write our own to reduce the code that potentially needs reviewing. */ template - constexpr inline T min(T t1, T t2) + constexpr T min(T t1, T t2) { return t1 < t2 ? t1 : t2; } @@ -383,7 +382,7 @@ namespace snmalloc * We write our own to reduce the code that potentially needs reviewing. */ template - constexpr inline T max(T t1, T t2) + constexpr T max(T t1, T t2) { return t1 > t2 ? t1 : t2; } diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/defines.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/defines.h index 869310d6e707..2de53be036e8 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/defines.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/defines.h @@ -27,6 +27,11 @@ # define SNMALLOC_REQUIRE_CONSTINIT # define SNMALLOC_UNUSED_FUNCTION # define SNMALLOC_USED_FUNCTION +# ifdef SNMALLOC_USE_CXX17 +# define SNMALLOC_NO_UNIQUE_ADDRESS +# else +# define SNMALLOC_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]] +# endif #else # define SNMALLOC_FAST_FAIL() __builtin_trap() # define SNMALLOC_LIKELY(x) __builtin_expect(!!(x), 1) @@ -50,6 +55,11 @@ # define SNMALLOC_COLD __attribute__((cold)) # define SNMALLOC_UNUSED_FUNCTION __attribute((unused)) # define SNMALLOC_USED_FUNCTION __attribute((used)) +# ifdef SNMALLOC_USE_CXX17 +# define SNMALLOC_NO_UNIQUE_ADDRESS +# else +# define SNMALLOC_NO_UNIQUE_ADDRESS [[no_unique_address]] +# endif # ifdef __clang__ # define SNMALLOC_REQUIRE_CONSTINIT \ [[clang::require_constant_initialization]] @@ -109,6 +119,15 @@ namespace snmalloc #define TOSTRING(expr) TOSTRING2(expr) #define TOSTRING2(expr) #expr +#ifdef __cpp_lib_source_location +# include +# define SNMALLOC_CURRENT_LINE std::source_location::current().line() +# define SNMALLOC_CURRENT_FILE std::source_location::current().file_name() +#else +# define SNMALLOC_CURRENT_LINE TOSTRING(__LINE__) +# define SNMALLOC_CURRENT_FILE __FILE__ +#endif + #ifdef NDEBUG # define SNMALLOC_ASSERT_MSG(...) \ {} @@ -121,8 +140,8 @@ namespace snmalloc snmalloc::report_fatal_error( \ "assert fail: {} in {} on {} " fmt "\n", \ #expr, \ - __FILE__, \ - TOSTRING(__LINE__), \ + SNMALLOC_CURRENT_FILE, \ + SNMALLOC_CURRENT_LINE, \ ##__VA_ARGS__); \ } \ } while (0) @@ -137,8 +156,8 @@ namespace snmalloc snmalloc::report_fatal_error( \ "Check fail: {} in {} on {} " fmt "\n", \ #expr, \ - __FILE__, \ - TOSTRING(__LINE__), \ + SNMALLOC_CURRENT_FILE, \ + SNMALLOC_CURRENT_LINE, \ ##__VA_ARGS__); \ } \ } while (0) @@ -166,39 +185,6 @@ namespace snmalloc namespace snmalloc { - template - SNMALLOC_FAST_PATH_INLINE void UNUSED(Args&&...) - {} - - inline SNMALLOC_FAST_PATH void check_client_error(const char* const str) - { - //[[clang::musttail]] - return snmalloc::error(str); - } - - inline SNMALLOC_FAST_PATH void - check_client_impl(bool test, const char* const str) - { - if (SNMALLOC_UNLIKELY(!test)) - { - if constexpr (DEBUG) - { - UNUSED(str); - SNMALLOC_FAST_FAIL(); - } - else - { - check_client_error(str); - } - } - } - -#ifdef SNMALLOC_CHECK_CLIENT - static constexpr bool CHECK_CLIENT = true; -#else - static constexpr bool CHECK_CLIENT = false; -#endif - /** * Forward declaration so that this can be called before the pal header is * included. @@ -212,11 +198,8 @@ namespace snmalloc */ template inline void message(Args... args); -} // namespace snmalloc -#ifdef SNMALLOC_CHECK_CLIENT -# define snmalloc_check_client(test, str) \ - snmalloc::check_client_impl(test, str) -#else -# define snmalloc_check_client(test, str) -#endif + template + SNMALLOC_FAST_PATH_INLINE void UNUSED(Args&&...) + {} +} // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/ds_core.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/ds_core.h index 672f7d1b0f14..2083190bc5dc 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/ds_core.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/ds_core.h @@ -11,6 +11,7 @@ #include "concept.h" #include "defines.h" #include "helpers.h" +#include "mitigations.h" #include "ptrwrap.h" #include "redblacktree.h" #include "seqset.h" diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/helpers.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/helpers.h index 693b734e327e..61fcee9545d6 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/helpers.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/helpers.h @@ -37,7 +37,7 @@ namespace snmalloc } }; -#ifdef SNMALLOC_CHECK_CLIENT +#ifdef SNMALLOC_CHECK_CLIENT // TODO is this used/helpful? template class ModArray { @@ -279,6 +279,30 @@ namespace snmalloc } } + /* + * TODO: This is not quite the right thing we want to check, but it + * suffices on all currently-supported platforms and CHERI. We'd rather + * compare UINTPTR_WIDTH and ULLONG_WIDTH, I think, but those don't + * exist until C's FP Ext 1 TS (merged into C2x). + */ +#ifdef __CHERI_PURE_CAPABILITY__ + /** + * Append an intptr_t to the buffer as a hex string + */ + void append(intptr_t v) + { + append(reinterpret_cast(v)); + } + + /** + * Append a uintptr_t to the buffer as a hex string + */ + void append(uintptr_t v) + { + append(reinterpret_cast(v)); + } +#endif + /** * Append a raw pointer to the buffer as a hex string. */ @@ -298,7 +322,7 @@ namespace snmalloc append_char('-'); s = 0 - s; } - std::array buf; + std::array buf{{0}}; const char digits[] = "0123456789"; for (long i = long(buf.size() - 1); i >= 0; i--) { @@ -328,7 +352,7 @@ namespace snmalloc { append_char('0'); append_char('x'); - std::array buf; + std::array buf{{0}}; const char hexdigits[] = "0123456789abcdef"; // Length of string including null terminator static_assert(sizeof(hexdigits) == 0x11); diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/mitigations.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/mitigations.h new file mode 100644 index 000000000000..88547dcc7de4 --- /dev/null +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/mitigations.h @@ -0,0 +1,263 @@ +#pragma once +#include "defines.h" + +#include + +namespace snmalloc +{ + template + inline SNMALLOC_FAST_PATH void + check_client_error(const char* const str, Args... args) + { + //[[clang::musttail]] + return snmalloc::report_fatal_error(str, args...); + } + + template + inline SNMALLOC_FAST_PATH void + check_client_impl(bool test, const char* const str, Args... args) + { + if (SNMALLOC_UNLIKELY(!test)) + { + if constexpr (!DEBUG) + { + UNUSED(str, args...); + SNMALLOC_FAST_FAIL(); + } + else + { + check_client_error(str, args...); + } + } + } + +#ifdef SNMALLOC_CHECK_CLIENT + static constexpr bool CHECK_CLIENT = true; +#else + static constexpr bool CHECK_CLIENT = false; +#endif + + namespace mitigation + { + class type + { + size_t mask; + + public: + constexpr type(size_t f) : mask(f){}; + constexpr type(const type& t) = default; + + constexpr type operator+(const type b) const + { + return {mask | b.mask}; + } + + constexpr type operator-(const type b) const + { + return {mask & ~(b.mask)}; + } + + constexpr bool operator()(const type a) const + { + return (mask & a.mask) != 0; + } + }; + } // namespace mitigation + + /** + * Randomize the location of the pagemap within a larger address space + * allocation. The other pages in that allocation may fault if accessed, on + * platforms that can efficiently express such configurations. + * + * This guards against adversarial attempts to access the pagemap. + * + * This is unnecessary on StrictProvenance architectures. + */ + constexpr mitigation::type random_pagemap{1 << 0}; + /** + * Ensure that every slab (especially slabs used for larger "small" size + * classes) has a larger minimum number of objects and that a larger + * percentage of objects in a slab must be free to awaken the slab. + * + * This should frustrate use-after-reallocation attacks by delaying reuse. + * When combined with random_preserve, below, it additionally ensures that at + * least some shuffling of free objects is possible, and, hence, that there + * is at least some unpredictability of reuse. + * + * TODO: should this be split? mjp: Would require changing some thresholds. + * The min waking count needs to be ensure we have enough objects on a slab, + * hence is related to the min count on a slab. Currently we without this, we + * have min count of slab of 16, and a min waking count with this enabled + * of 32. So we would leak memory. + */ + constexpr mitigation::type random_larger_thresholds{1 << 1}; + /** + * + * Obfuscate forward-edge pointers in intra-slab free lists. + * + * This helps prevent a UAF write from re-pointing the free list arbitrarily, + * as the de-obfuscation of a corrupted pointer will generate a wild address. + * + * This is not available on StrictProvenance architectures. + */ + constexpr mitigation::type freelist_forward_edge{1 << 2}; + /** + * Store obfuscated backward-edge addresses in intra-slab free lists. + * + * Ordinarily, these lists are singly-linked. Storing backward-edges allows + * the allocator to verify the well-formedness of the links and, importantly, + * the acyclicity of the list itself. These backward-edges are also + * obfuscated in an attempt to frustrate an attacker armed with UAF + * attempting to construct a new well-formed list. + * + * Because the backward-edges are not traversed, this is available on + * StrictProvenance architectures, unlike freelist_forward_edge. + * + * This is required to detect double frees as it will break the doubly linked + * nature of the free list. + */ + constexpr mitigation::type freelist_backward_edge{1 << 3}; + /** + * When de-purposing a slab (releasing its address space for reuse at a + * different size class or allocation), walk the free list and validate the + * domestication of all nodes along it. + * + * If freelist_forward_edge is also enabled, this will probe the + * domestication status of the de-obfuscated pointers before traversal. + * Each of domestication and traversal may probabilistically catch UAF + * corruption of the free list. + * + * If freelist_backward_edge is also enabled, this will verify the integrity + * of the free list links. + * + * This gives the allocator "one last chance" to catch UAF corruption of a + * slab's free list before the slab is de-purposed. + * + * This is required to comprehensively detect double free. + */ + constexpr mitigation::type freelist_teardown_validate{1 << 4}; + /** + * When initializing a slab, shuffle its free list. + * + * This guards against attackers relying on object-adjacency or address-reuse + * properties of the allocation stream. + */ + constexpr mitigation::type random_initial{1 << 5}; + /** + * When a slab is operating, randomly assign freed objects to one of two + * intra-slab free lists. When selecting a slab's free list for allocations, + * select the longer of the two. + * + * This guards against attackers relying on object-adjacency or address-reuse + * properties of the allocation stream. + */ + constexpr mitigation::type random_preserve{1 << 6}; + /** + * Randomly introduce another slab for a given size-class, rather than use + * the last available to an allocator. + * + * This guards against attackers relying on address-reuse, especially in the + * pathological case of a size-class having only one slab with free entries. + */ + constexpr mitigation::type random_extra_slab{1 << 7}; + /** + * Use a LIFO queue, rather than a stack, of slabs with free elements. + * + * This generally increases the time between address reuse. + */ + constexpr mitigation::type reuse_LIFO{1 << 8}; + /** + * This performs a variety of inexpensive "sanity" tests throughout the + * allocator: + * + * - Requests to free objects must + * - not be interior pointers + * - be of allocated address space + * - Requests to free objects which also specify the size must specify a size + * that agrees with the current allocation. + * + * This guards gainst various forms of client misbehavior. + * + * TODO: Should this be split? mjp: It could, but let's not do this until + * we have performance numbers to see what this costs. + */ + constexpr mitigation::type sanity_checks{1 << 9}; + /** + * On CHERI, perform a series of well-formedness tests on capabilities given + * when requesting to free an object. + */ + constexpr mitigation::type cheri_checks{1 << 10}; + /** + * Erase intra-slab free list metadata before completing an allocation. + * + * This mitigates information disclosure. + */ + constexpr mitigation::type clear_meta{1 << 11}; + /** + * Protect meta data blocks by allocating separate from chunks for + * user allocations. This involves leaving gaps in address space. + * This is less efficient, so should only be applied for the checked + * build. + */ + constexpr mitigation::type metadata_protection{1 << 12}; + /** + * If this mitigation is enabled, then Pal implementations should provide + * exceptions/segfaults if accesses do not obey the + * - using + * - using_readonly + * - not_using + * model. + */ + static constexpr mitigation::type pal_enforce_access{1 << 13}; + + constexpr mitigation::type full_checks = random_pagemap + + random_larger_thresholds + freelist_forward_edge + freelist_backward_edge + + freelist_teardown_validate + random_initial + random_preserve + + metadata_protection + random_extra_slab + reuse_LIFO + sanity_checks + + clear_meta + pal_enforce_access; + + constexpr mitigation::type no_checks{0}; + + using namespace mitigation; + constexpr mitigation::type mitigations = +#ifdef SNMALLOC_CHECK_CLIENT_MITIGATIONS + no_checks + SNMALLOC_CHECK_CLIENT_MITIGATIONS; +#elif defined(OPEN_ENCLAVE) + /** + * On Open Enclave the address space is limited, so we disable + * metadata-protection feature. + */ + CHECK_CLIENT ? full_checks - metadata_protection - random_pagemap : + no_checks; +#elif defined(__NetBSD__) + /** + * pal_enforce_access was failing on NetBSD, so we disable it. + */ + CHECK_CLIENT ? full_checks - pal_enforce_access : no_checks; +#elif defined(__CHERI_PURE_CAPABILITY__) + CHECK_CLIENT ? + /** + * freelist_forward_edge should not be used on CHERI as we cannot encode + * pointers as the tag will be destroyed. + * + * TODO: There is a known bug in CheriBSD that means round-tripping through + * PROT_NONE sheds capability load and store permissions (while restoring + * data read/write, for added excitement). For the moment, just force this + * down on CHERI. + */ + full_checks + cheri_checks + clear_meta - freelist_forward_edge - + pal_enforce_access : + /** + * clear_meta is important on CHERI to avoid leaking capabilities. + */ + sanity_checks + cheri_checks + clear_meta; +#else + CHECK_CLIENT ? full_checks : no_checks; +#endif +} // namespace snmalloc + +#define snmalloc_check_client(mitigation, test, str, ...) \ + if constexpr (mitigation) \ + { \ + snmalloc::check_client_impl(test, str, ##__VA_ARGS__); \ + } diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/ptrwrap.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/ptrwrap.h index e7aa85c9a3e6..ae8fef9e2f40 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/ptrwrap.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/ptrwrap.h @@ -67,6 +67,11 @@ namespace snmalloc * Bounded to one or more particular chunk granules */ Chunk, + /** + * Unbounded return from the kernel. These correspond, on CHERI + * platforms, to kernel-side address space reservations. + */ + Arena }; /** @@ -154,6 +159,11 @@ namespace snmalloc (S == dimension::Spatial::Alloc && AS == dimension::AddressSpaceControl::User), "Wild pointers must be annotated as tightly bounded"); + static_assert( + (S != dimension::Spatial::Arena) || + (W == dimension::Wildness::Tame && + AS == dimension::AddressSpaceControl::Full), + "Arena pointers must be restricted spatially before other dimensions"); }; // clang-format off @@ -165,7 +175,7 @@ namespace snmalloc * with that spelling. Both seem happy with this formulation. */ template - concept ConceptBound = + concept IsBound = ConceptSame && ConceptSame && @@ -180,8 +190,16 @@ namespace snmalloc namespace bounds { /** - * Internal access to a Chunk of memory. These flow between the ASM and - * the slab allocators, for example. + * Internal access to an entire Arena. These exist only in the backend. + */ + using Arena = bound< + dimension::Spatial::Arena, + dimension::AddressSpaceControl::Full, + dimension::Wildness::Tame>; + + /** + * Internal access to a Chunk of memory. These flow across the boundary + * between back- and front-ends, for example. */ using Chunk = bound< dimension::Spatial::Chunk, @@ -218,7 +236,7 @@ namespace snmalloc * annotation. This is used by the PAL's capptr_to_user_address_control * function to compute its return value's annotation. */ - template + template using user_address_control_type = typename B::template with_address_space_control< dimension::AddressSpaceControl::User>; @@ -228,8 +246,8 @@ namespace snmalloc * Chunk and ChunkD are considered eqivalent here. */ template< - SNMALLOC_CONCEPT(capptr::ConceptBound) BI, - SNMALLOC_CONCEPT(capptr::ConceptBound) BO> + SNMALLOC_CONCEPT(capptr::IsBound) BI, + SNMALLOC_CONCEPT(capptr::IsBound) BO> SNMALLOC_CONSTEVAL bool is_spatial_refinement() { if (BI::address_space_control != BO::address_space_control) @@ -242,15 +260,7 @@ namespace snmalloc return false; } - switch (BI::spatial) - { - using namespace capptr::dimension; - case Spatial::Chunk: - return true; - - case Spatial::Alloc: - return BO::spatial == Spatial::Alloc; - } + return BO::spatial <= BI::spatial; } } // namespace capptr @@ -258,7 +268,7 @@ namespace snmalloc * A pointer annotated with a "phantom type parameter" carrying a static * summary of its StrictProvenance metadata. */ - template + template class CapPtr { T* unsafe_capptr; @@ -273,6 +283,7 @@ namespace snmalloc constexpr SNMALLOC_FAST_PATH CapPtr() : CapPtr(nullptr) {} + private: /** * all other constructions must be explicit * @@ -292,13 +303,25 @@ namespace snmalloc # pragma warning(pop) #endif + public: + /** + * The CapPtr constructor is not sufficiently intimidating, given that it + * can be used to break annotation correctness. Expose it with a better + * name. + */ + static constexpr SNMALLOC_FAST_PATH CapPtr unsafe_from(T* p) + { + return CapPtr(p); + } + /** * Allow static_cast<>-s that preserve bounds but vary the target type. */ template [[nodiscard]] SNMALLOC_FAST_PATH CapPtr as_static() const { - return CapPtr(static_cast(this->unsafe_capptr)); + return CapPtr::unsafe_from( + static_cast(this->unsafe_capptr)); } [[nodiscard]] SNMALLOC_FAST_PATH CapPtr as_void() const @@ -312,7 +335,8 @@ namespace snmalloc template [[nodiscard]] SNMALLOC_FAST_PATH CapPtr as_reinterpret() const { - return CapPtr(reinterpret_cast(this->unsafe_capptr)); + return CapPtr::unsafe_from( + reinterpret_cast(this->unsafe_capptr)); } SNMALLOC_FAST_PATH bool operator==(const CapPtr& rhs) const @@ -355,6 +379,9 @@ namespace snmalloc * Aliases for CapPtr<> types with particular bounds. */ + template + using Arena = CapPtr; + template using Chunk = CapPtr; @@ -383,7 +410,7 @@ namespace snmalloc inline SNMALLOC_FAST_PATH capptr::Alloc capptr_chunk_is_alloc(capptr::ChunkUser p) { - return capptr::Alloc(p.unsafe_ptr()); + return capptr::Alloc::unsafe_from(p.unsafe_ptr()); } /** @@ -396,16 +423,6 @@ namespace snmalloc return p.unsafe_ptr(); } - /** - * Like capptr_reveal, but sometimes we do mean to reveal wild pointers - * (specifically in external_pointer, where we're revealing something - * architecturally derived from a user pointer). - */ - inline SNMALLOC_FAST_PATH void* capptr_reveal_wild(capptr::AllocWild p) - { - return p.unsafe_ptr(); - } - /** * Given a void* from the client, it's fine to call it AllocWild. * Roughly dual to capptr_reveal(). @@ -413,13 +430,13 @@ namespace snmalloc static inline SNMALLOC_FAST_PATH capptr::AllocWild capptr_from_client(void* p) { - return capptr::AllocWild(p); + return capptr::AllocWild::unsafe_from(p); } /** * It's safe to mark any CapPtr as Wild. */ - template + template static inline SNMALLOC_FAST_PATH CapPtr< T, typename B::template with_wildness> @@ -427,8 +444,8 @@ namespace snmalloc { return CapPtr< T, - typename B::template with_wildness>( - p.unsafe_ptr()); + typename B::template with_wildness>:: + unsafe_from(p.unsafe_ptr()); } /** @@ -440,7 +457,7 @@ namespace snmalloc * annotations around an un-annotated std::atomic, to appease C++, yet * will expose or consume only CapPtr with the same bounds annotation. */ - template + template class AtomicCapPtr { std::atomic unsafe_capptr; @@ -453,6 +470,11 @@ namespace snmalloc : unsafe_capptr(n) {} + /** + * default to nullptr + */ + constexpr SNMALLOC_FAST_PATH AtomicCapPtr() : AtomicCapPtr(nullptr) {} + /** * Interconversion with CapPtr */ @@ -477,7 +499,7 @@ namespace snmalloc SNMALLOC_FAST_PATH CapPtr load(std::memory_order order = std::memory_order_seq_cst) noexcept { - return CapPtr(this->unsafe_capptr.load(order)); + return CapPtr::unsafe_from(this->unsafe_capptr.load(order)); } SNMALLOC_FAST_PATH void store( @@ -491,7 +513,7 @@ namespace snmalloc CapPtr desired, std::memory_order order = std::memory_order_seq_cst) noexcept { - return CapPtr( + return CapPtr::unsafe_from( this->unsafe_capptr.exchange(desired.unsafe_ptr(), order)); } diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/redblacktree.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/redblacktree.h index 0d684698d195..df1fb9410661 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/redblacktree.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/redblacktree.h @@ -730,9 +730,14 @@ namespace snmalloc invariant(); } + bool is_empty() + { + return get_root().is_null(); + } + K remove_min() { - if (get_root().is_null()) + if (is_empty()) return Rep::null; auto path = get_root_path(); @@ -748,7 +753,7 @@ namespace snmalloc bool remove_elem(K value) { - if (get_root().is_null()) + if (is_empty()) return false; auto path = get_root_path(); diff --git a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/seqset.h b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/seqset.h index 22a0fcd0760b..600ec07df0a5 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/ds_core/seqset.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/ds_core/seqset.h @@ -1,5 +1,6 @@ #pragma once +#include "../aal/aal.h" #include "../ds_core/ds_core.h" #include @@ -10,153 +11,155 @@ namespace snmalloc /** * Simple sequential set of T. * - * Linked using the T::next field. + * Implemented as a doubly linked cyclic list. + * Linked using the T::node field. * * Can be used in either Fifo or Lifo mode, which is - * specified by template parameter. + * specified by template parameter to `pop`. */ - template + template class SeqSet { + public: /** - * This sequence structure is intrusive, in that it requires the use of a - * `next` field in the elements it manages, but, unlike some other intrusive - * designs, it does not require the use of a `container_of`-like construct, - * because its pointers point to the element, not merely the intrusive - * member. - * - * In some cases, the next pointer is provided by a superclass but the list - * is templated over the subclass. The `SeqSet` enforces the invariant that - * only instances of the subclass can be added to the list and so can safely - * down-cast the type of `.next` to `T*`. As such, we require only that the - * `next` field is a pointer to `T` or some superclass of `T`. - * %{ + * The doubly linked Node. */ - using NextPtr = decltype(std::declval().next); - static_assert( - std::is_base_of_v, T>, - "T->next must be a queue pointer to T"); - ///@} + class Node + { + Node* next; + Node* prev; + + friend class SeqSet; + + constexpr Node(Node* next, Node* prev) : next(next), prev(prev) {} + + public: + void invariant() + { + SNMALLOC_ASSERT(next != nullptr); + SNMALLOC_ASSERT(prev != nullptr); + SNMALLOC_ASSERT(next->prev == this); + SNMALLOC_ASSERT(prev->next == this); + } + + void remove() + { + invariant(); + next->invariant(); + prev->invariant(); + next->prev = prev; + prev->next = next; + next->invariant(); + prev->invariant(); + } + }; + + private: + // Cyclic doubly linked list (initially empty) + Node head{&head, &head}; /** - * Field representation for Fifo behaviour. + * Returns the containing object. */ - struct FieldFifo + T* containing(Node* n) { - NextPtr head{nullptr}; - }; + // We could use -static_cast(offsetof(T, node)) here but CHERI + // compiler complains. So we restrict to first entries only. + + static_assert(offsetof(T, node) == 0); + + return pointer_offset(n, 0); + } /** - * Field representation for Lifo behaviour. + * Gets the doubly linked node for the object. */ - struct FieldLifo + Node* get_node(T* t) { - NextPtr head{nullptr}; - NextPtr* end{&head}; - }; +#ifdef __CHERI_PURE_CAPABILITY__ + return &__builtin_no_change_bounds(t->node); +#else + return &(t->node); +#endif + } + public: /** - * Field indirection to actual representation. - * Different numbers of fields are required for the - * two behaviours. + * Empty queue */ - std::conditional_t v; + constexpr SeqSet() = default; /** * Check for empty */ SNMALLOC_FAST_PATH bool is_empty() { - if constexpr (Fifo) - { - return v.head == nullptr; - } - else - { - SNMALLOC_ASSERT(v.end != nullptr); - return &(v.head) == v.end; - } + static_assert( + std::is_same_v().node)>, + "T->node must be Node for T"); + head.invariant(); + return head.next == &head; } - public: /** - * Empty queue + * Remove an element from the queue + * + * Assumes queue is non-empty */ - constexpr SeqSet() = default; + SNMALLOC_FAST_PATH T* pop_front() + { + head.invariant(); + SNMALLOC_ASSERT(!this->is_empty()); + auto node = head.next; + node->remove(); + auto result = containing(node); + head.invariant(); + return result; + } /** * Remove an element from the queue * * Assumes queue is non-empty */ - SNMALLOC_FAST_PATH T* pop() + SNMALLOC_FAST_PATH T* pop_back() { + head.invariant(); SNMALLOC_ASSERT(!this->is_empty()); - auto result = v.head; - if constexpr (Fifo) - { - v.head = result->next; - } + auto node = head.prev; + node->remove(); + auto result = containing(node); + head.invariant(); + return result; + } + + template + SNMALLOC_FAST_PATH T* pop() + { + head.invariant(); + if constexpr (is_fifo) + return pop_front(); else - { - if (&(v.head->next) == v.end) - v.end = &(v.head); - else - v.head = v.head->next; - } - // This cast is safe if the ->next pointers in all of the objects in the - // list are managed by this class because object types are checked on - // insertion. - return static_cast(result); + return pop_back(); } /** - * Filter + * Applies `f` to all the elements in the set. * - * Removes all elements that f returns true for. - * If f returns true, then filter is not allowed to look at the - * object again, and f is responsible for its lifetime. + * `f` is allowed to remove the element from the set. */ template - SNMALLOC_FAST_PATH void filter(Fn&& f) + SNMALLOC_FAST_PATH void iterate(Fn&& f) { - // Check for empty case. - if (is_empty()) - return; - - NextPtr* prev = &(v.head); + auto curr = head.next; + curr->invariant(); - while (true) + while (curr != &head) { - if constexpr (Fifo) - { - if (*prev == nullptr) - break; - } - - NextPtr curr = *prev; - // Note must read curr->next before calling `f` as `f` is allowed to - // mutate that field. - NextPtr next = curr->next; - if (f(static_cast(curr))) - { - // Remove element; - *prev = next; - } - else - { - // Keep element - prev = &(curr->next); - } - if constexpr (!Fifo) - { - if (&(curr->next) == v.end) - break; - } - } - if constexpr (!Fifo) - { - v.end = prev; + // Read next first, as f may remove curr. + auto next = curr->next; + f(containing(curr)); + curr = next; } } @@ -165,16 +168,16 @@ namespace snmalloc */ SNMALLOC_FAST_PATH void insert(T* item) { - if constexpr (Fifo) - { - item->next = v.head; - v.head = item; - } - else - { - *(v.end) = item; - v.end = &(item->next); - } + auto n = get_node(item); + + n->next = head.next; + head.next->prev = n; + + n->prev = &head; + head.next = n; + + n->invariant(); + head.invariant(); } /** @@ -182,7 +185,7 @@ namespace snmalloc */ SNMALLOC_FAST_PATH const T* peek() { - return static_cast(v.head); + return containing(head.next); } }; } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/global/bounds_checks.h b/3rdparty/exported/snmalloc/src/snmalloc/global/bounds_checks.h index 66b67ec43a7e..378f5439a8c5 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/global/bounds_checks.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/global/bounds_checks.h @@ -22,15 +22,16 @@ namespace snmalloc * error. With it set to false we print a helpful error message and then crash * the process. The process may be in an undefined state by the time the * check fails, so there are potentially security implications to turning this - * off. It defaults to true for debug builds, false for release builds and - * can be overridden by defining the macro `SNMALLOC_FAIL_FAST` to true or - * false. + * off. It defaults to false and can be overridden by defining the macro + * `SNMALLOC_FAIL_FAST` to true. + * + * Current default to true will help with adoption experience. */ static constexpr bool FailFast = #ifdef SNMALLOC_FAIL_FAST SNMALLOC_FAIL_FAST #else - !DEBUG + false #endif ; @@ -40,71 +41,63 @@ namespace snmalloc * `p` is the input pointer and `len` is the offset from this pointer of the * bounds. `msg` is the message that will be reported along with the * start and end of the real object's bounds. + * + * Note that this function never returns. We do not mark it [[NoReturn]] + * so as to generate better code, because [[NoReturn]] prevents tailcails + * in GCC and Clang. + * + * The function claims to return a FakeReturn, this is so it can be tail + * called where the bound checked function returns a value, for instance, in + * memcpy it is specialised to void*. */ - SNMALLOC_SLOW_PATH SNMALLOC_UNUSED_FUNCTION inline void - report_fatal_bounds_error [[noreturn]] ( - void* p, size_t len, const char* msg, decltype(ThreadAlloc::get())& alloc) - { - report_fatal_error( - "{}: {} is in allocation {}--{}, offset {} is past the end\n", - msg, - p, - alloc.template external_pointer(p), - alloc.template external_pointer(p), - len); - } - - /** - * The direction for a bounds check. - */ - enum class CheckDirection + template + SNMALLOC_SLOW_PATH SNMALLOC_UNUSED_FUNCTION inline FakeReturn + report_fatal_bounds_error(const void* ptr, size_t len, const char* msg) { - /** - * A read bounds check, performed only when read checks are enabled. - */ - Read, + if constexpr (FailFast) + { + UNUSED(ptr, len, msg); + SNMALLOC_FAST_FAIL(); + } + else + { + auto& alloc = ThreadAlloc::get(); + void* p = const_cast(ptr); - /** - * A write bounds check, performed unconditionally. - */ - Write - }; + auto range_end = pointer_offset(p, len); + auto object_end = alloc.template external_pointer(p); + report_fatal_error( + "Fatal Error!\n{}: \n\trange [{}, {})\n\tallocation [{}, " + "{})\nrange goes beyond allocation by {} bytes \n", + msg, + p, + range_end, + alloc.template external_pointer(p), + object_end, + pointer_diff(object_end, range_end)); + } + } /** * Check whether a pointer + length is in the same object as the pointer. - * Fail with the error message from the third argument if not. * - * The template parameter indicates whether this is a read. If so, this - * function is a no-op when `CheckReads` is false. + * Returns true if the checks succeeds. + * + * The template parameter indicates whether the check should be performed. It + * defaults to true. If it is false, the check will always succeed. */ - template< - CheckDirection Direction = CheckDirection::Write, - bool CheckBoth = CheckReads> - SNMALLOC_FAST_PATH_INLINE void - check_bounds(const void* ptr, size_t len, const char* msg = "") + template + SNMALLOC_FAST_PATH_INLINE bool check_bounds(const void* ptr, size_t len) { - if constexpr ((Direction == CheckDirection::Write) || CheckBoth) + if constexpr (PerformCheck) { auto& alloc = ThreadAlloc::get(); - void* p = const_cast(ptr); - - if (SNMALLOC_UNLIKELY(!alloc.check_bounds(ptr, len))) - { - if constexpr (FailFast) - { - UNUSED(p, len, msg); - SNMALLOC_FAST_FAIL(); - } - else - { - report_fatal_bounds_error(p, len, msg, alloc); - } - } + return alloc.check_bounds(ptr, len); } else { - UNUSED(ptr, len, msg); + UNUSED(ptr, len); + return true; } } - } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/global/memcpy.h b/3rdparty/exported/snmalloc/src/snmalloc/global/memcpy.h index beba37cd2a5f..f4996f6097eb 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/global/memcpy.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/global/memcpy.h @@ -159,8 +159,7 @@ namespace snmalloc std::max(sizeof(uint64_t), sizeof(void*)); /** - * Hook for architecture-specific optimisations. Does nothing in the - * default case. + * Hook for architecture-specific optimisations. */ static SNMALLOC_FAST_PATH_INLINE void copy(void* dst, const void* src, size_t len) @@ -179,6 +178,135 @@ namespace snmalloc } }; + /** + * StrictProvenance architectures are prickly about their pointers. In + * particular, they may not permit misaligned loads and stores of + * pointer-sized data, even if they can have non-pointers in their + * pointer registers. On the other hand, pointers might be hiding anywhere + * they are architecturally permitted! + */ + struct GenericStrictProvenance + { + static_assert(bits::is_pow2(sizeof(void*))); + /* + * It's not entirely clear what we would do if this were not the case. + * Best not think too hard about it now. + */ + static_assert(alignof(void*) == sizeof(void*)); + + static constexpr size_t LargestRegisterSize = 16; + + static SNMALLOC_FAST_PATH_INLINE void + copy(void* dst, const void* src, size_t len) + { + /* + * As a function of misalignment relative to pointers, how big do we need + * to be such that the span could contain an aligned pointer? We'd need + * to be big enough to contain the pointer and would need an additional + * however many bytes it would take to get us up to alignment. That is, + * (sizeof(void*) - src_misalign) except in the case that src_misalign is + * 0, when the answer is 0, which we can get with some bit-twiddling. + * + * Below that threshold, just use a jump table to move bytes around. + */ + if ( + len < sizeof(void*) + + (static_cast(-static_cast(address_cast(src))) & + (alignof(void*) - 1))) + { + small_copies<2 * sizeof(void*) - 1, LargestRegisterSize>(dst, src, len); + } + /* + * Equally-misaligned segments could be holding pointers internally, + * assuming they're sufficiently large. In this case, perform unaligned + * operations at the top and bottom of the range. This check also + * suffices to include the case where both segments are + * alignof(void*)-aligned. + */ + else if ( + address_misalignment(address_cast(src)) == + address_misalignment(address_cast(dst))) + { + /* + * Find the buffers' ends. Do this before the unaligned_start so that + * there are fewer dependencies in the instruction stream; it would be + * functionally equivalent to do so below. + */ + auto dep = pointer_offset(dst, len); + auto sep = pointer_offset(src, len); + + /* + * Come up to alignof(void*)-alignment using a jump table. This + * operation will move no pointers, since it serves to get us up to + * alignof(void*). Recall that unaligned_start takes its arguments by + * reference, so they will be aligned hereafter. + */ + unaligned_start(dst, src, len); + + /* + * Move aligned pointer *pairs* for as long as we can (possibly none). + * This generates load-pair/store-pair operations where we have them, + * and should be benign where we don't, looking like just a bit of loop + * unrolling with two loads and stores. + */ + { + struct Ptr2 + { + void* p[2]; + }; + if (sizeof(Ptr2) <= len) + { + auto dp = static_cast(dst); + auto sp = static_cast(src); + for (size_t i = 0; i <= len - sizeof(Ptr2); i += sizeof(Ptr2)) + { + *dp++ = *sp++; + } + } + } + + /* + * After that copy loop, there can be at most one pointer-aligned and + * -sized region left. If there is one, copy it. + */ + len = len & (2 * sizeof(void*) - 1); + if (sizeof(void*) <= len) + { + ptrdiff_t o = -static_cast(sizeof(void*)); + auto dp = + pointer_align_down(pointer_offset_signed(dep, o)); + auto sp = + pointer_align_down(pointer_offset_signed(sep, o)); + *static_cast(dp) = *static_cast(sp); + } + + /* + * There are up to sizeof(void*)-1 bytes left at the end, aligned at + * alignof(void*). Figure out where and how many... + */ + len = len & (sizeof(void*) - 1); + dst = pointer_align_down(dep); + src = pointer_align_down(sep); + /* + * ... and use a jump table at the end, too. If we did the copy_end + * overlapping store backwards trick, we'd risk damaging the capability + * in the cell behind us. + */ + small_copies(dst, src, len); + } + /* + * Otherwise, we cannot use pointer-width operations because one of + * the load or store is going to be misaligned and so will trap. + * So, same dance, but with integer registers only. + */ + else + { + block_copy(dst, src, len); + copy_end(dst, src, len); + } + } + }; + #if defined(__x86_64__) || defined(_M_X64) /** * x86-64 architecture. Prefers SSE registers for small and medium copies @@ -288,7 +416,10 @@ namespace snmalloc #elif defined(__powerpc64__) PPC64Arch #else - GenericArch + std::conditional_t< + aal_supports, + GenericStrictProvenance, + GenericArch> #endif ; @@ -317,14 +448,13 @@ namespace snmalloc return dst; } - if constexpr (Checked) - { - // Check the bounds of the arguments. - check_bounds( - dst, len, "memcpy with destination out of bounds of heap allocation"); - check_bounds( + // Check the bounds of the arguments. + if (SNMALLOC_UNLIKELY(!check_bounds<(Checked && ReadsChecked)>(src, len))) + return report_fatal_bounds_error( src, len, "memcpy with source out of bounds of heap allocation"); - } + if (SNMALLOC_UNLIKELY(!check_bounds(dst, len))) + return report_fatal_bounds_error( + dst, len, "memcpy with destination out of bounds of heap allocation"); Arch::copy(dst, src, len); return orig_dst; diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/backend_concept.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/backend_concept.h index e29a2df8bd6f..f0ed3964df9d 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/backend_concept.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/backend_concept.h @@ -12,81 +12,94 @@ namespace snmalloc * get_metadata takes a boolean template parameter indicating whether it may * be accessing memory that is not known to be committed. */ - template - concept ConceptBackendMeta = - requires(address_t addr, size_t sz, const typename Meta::Entry& t) + template + concept IsReadablePagemap = + requires(address_t addr, size_t sz, const typename Pagemap::Entry& t) { { - Meta::template get_metaentry(addr) + Pagemap::template get_metaentry(addr) } - ->ConceptSame; + ->ConceptSame; { - Meta::template get_metaentry(addr) + Pagemap::template get_metaentry(addr) } - ->ConceptSame; + ->ConceptSame; }; /** - * The pagemap can also be told to commit backing storage for a range of - * addresses. This is broken out to a separate concept so that we can - * annotate which functions expect to do this vs. which merely use the core - * interface above. In practice, use ConceptBackendMetaRange (without the - * underscore) below, which combines this and the core concept, above. + * The core of the static pagemap accessor interface: {get,set}_metadata. + * + * get_metadata_mut takes a boolean template parameter indicating whether it + * may be accessing memory that is not known to be committed. + * + * set_metadata updates the entry in the pagemap. */ - template - concept ConceptBackendMeta_Range = requires(address_t addr, size_t sz) + template + concept IsWritablePagemap = IsReadablePagemap&& requires( + address_t addr, size_t sz, const typename Pagemap::Entry& t) { { - Meta::register_range(addr, sz) + Pagemap::template get_metaentry_mut(addr) } - ->ConceptSame; - }; + ->ConceptSame; + + { + Pagemap::template get_metaentry_mut(addr) + } + ->ConceptSame; - template - concept ConceptBuddyRangeMeta = - requires(address_t addr, size_t sz, const typename Meta::Entry& t) - { { - Meta::template get_metaentry_mut(addr) + Pagemap::set_metaentry(addr, sz, t) } - ->ConceptSame; + ->ConceptSame; + }; + /** + * The pagemap can also be told to commit backing storage for a range of + * addresses. This is broken out to a separate concept so that we can + * annotate which functions expect to do this vs. which merely use the core + * interface above. In practice, use IsWritablePagemapWithRegister below, + * which combines this and the core concept, above. + */ + template + concept IsPagemapWithRegister = requires(capptr::Arena p, size_t sz) + { { - Meta::template get_metaentry_mut(addr) + Pagemap::register_range(p, sz) } - ->ConceptSame; + ->ConceptSame; }; /** * The full pagemap accessor interface, with all of {get,set}_metadata and * register_range. Use this to annotate callers that need the full interface - * and use ConceptBackendMeta for callers that merely need {get,set}_metadata, + * and use IsReadablePagemap for callers that merely need {get,set}_metadata, * but note that the difference is just for humans and not compilers (since * concept checking is lower bounding and does not constrain the templatized * code to use only those affordances given by the concept). */ - template - concept ConceptBackendMetaRange = - ConceptBackendMeta&& ConceptBackendMeta_Range; + template + concept IsWritablePagemapWithRegister = + IsWritablePagemap&& IsPagemapWithRegister; /** - * The backend also defines domestication (that is, the difference between - * Tame and Wild CapPtr bounds). It exports the intended affordance for - * testing a Wild pointer and either returning nullptr or the original + * The configuration also defines domestication (that is, the difference + * between Tame and Wild CapPtr bounds). It exports the intended affordance + * for testing a Wild pointer and either returning nullptr or the original * pointer, now Tame. */ - template - concept ConceptBackendDomestication = - requires(typename Globals::LocalState* ls, capptr::AllocWild ptr) + template + concept IsConfigDomestication = + requires(typename Config::LocalState* ls, capptr::AllocWild ptr) { { - Globals::capptr_domesticate(ls, ptr) + Config::capptr_domesticate(ls, ptr) } ->ConceptSame>; { - Globals::capptr_domesticate(ls, ptr.template as_static()) + Config::capptr_domesticate(ls, ptr.template as_static()) } ->ConceptSame>; }; @@ -94,49 +107,92 @@ namespace snmalloc class CommonConfig; struct Flags; + template + concept IsBackend = + requires(LocalState& local_state, size_t size, uintptr_t ras) + { + { + Backend::alloc_chunk(local_state, size, ras) + } + ->ConceptSame< + std::pair, typename Backend::SlabMetadata*>>; + } + &&requires(LocalState* local_state, size_t size) + { + { + Backend::template alloc_meta_data(local_state, size) + } + ->ConceptSame>; + } + &&requires( + LocalState& local_state, + typename Backend::SlabMetadata& slab_metadata, + capptr::Alloc alloc, + size_t size) + { + { + Backend::dealloc_chunk(local_state, slab_metadata, alloc, size) + } + ->ConceptSame; + } + &&requires(address_t p) + { + { + Backend::template get_metaentry(p) + } + ->ConceptSame; + + { + Backend::template get_metaentry(p) + } + ->ConceptSame; + }; + /** - * Backend global objects of type T must obey a number of constraints. They + * Config objects of type T must obey a number of constraints. They * must... * * * inherit from CommonConfig (see commonconfig.h) * * specify which PAL is in use via T::Pal - * * have static pagemap accessors via T::Pagemap * * define a T::LocalState type (and alias it as T::Pagemap::LocalState) * * define T::Options of type snmalloc::Flags * * expose the global allocator pool via T::pool() if pool allocation is * used. * */ - template - concept ConceptBackendGlobals = - std::is_base_of::value&& - ConceptPAL&& - ConceptBackendMetaRange&& requires() + template + concept IsConfig = std::is_base_of::value&& + IsPAL&& IsBackend< + typename Config::LocalState, + typename Config::PagemapEntry, + typename Config::Backend>&& requires() { - typename Globals::LocalState; + typename Config::LocalState; + typename Config::Backend; + typename Config::PagemapEntry; { - Globals::Options + Config::Options } ->ConceptSameModRef; } &&( requires() { - Globals::Options.CoreAllocIsPoolAllocated == true; - typename Globals::GlobalPoolState; + Config::Options.CoreAllocIsPoolAllocated == true; + typename Config::GlobalPoolState; { - Globals::pool() + Config::pool() } - ->ConceptSame; + ->ConceptSame; } || - requires() { Globals::Options.CoreAllocIsPoolAllocated == false; }); + requires() { Config::Options.CoreAllocIsPoolAllocated == false; }); /** - * The lazy version of the above; please see ds/concept.h and use sparingly. + * The lazy version of the above; please see ds_core/concept.h and use + * sparingly. */ - template - concept ConceptBackendGlobalsLazy = - !is_type_complete_v || ConceptBackendGlobals; + template + concept IsConfigLazy = !is_type_complete_v || IsConfig; } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/backend_wrappers.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/backend_wrappers.h index e26cc22ae679..a7f47db49fe6 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/backend_wrappers.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/backend_wrappers.h @@ -40,20 +40,19 @@ namespace snmalloc * backend. Returns true if there is a function with correct name and type. */ template< - SNMALLOC_CONCEPT(ConceptBackendDomestication) Backend, + SNMALLOC_CONCEPT(IsConfigDomestication) Config, typename T, - SNMALLOC_CONCEPT(capptr::ConceptBound) B> - constexpr SNMALLOC_FAST_PATH_INLINE auto has_domesticate(int) - -> std::enable_if_t< - std::is_same_v< - decltype(Backend::capptr_domesticate( - std::declval(), - std::declval>())), - CapPtr< - T, - typename B::template with_wildness< - capptr::dimension::Wildness::Tame>>>, - bool> + SNMALLOC_CONCEPT(capptr::IsBound) B> + constexpr SNMALLOC_FAST_PATH auto has_domesticate(int) -> std::enable_if_t< + std::is_same_v< + decltype(Config::capptr_domesticate( + std::declval(), + std::declval>())), + CapPtr< + T, + typename B::template with_wildness< + capptr::dimension::Wildness::Tame>>>, + bool> { return true; } @@ -63,47 +62,47 @@ namespace snmalloc * backend. Returns false in case where above template does not match. */ template< - SNMALLOC_CONCEPT(ConceptBackendGlobals) Backend, + SNMALLOC_CONCEPT(IsConfig) Config, typename T, - SNMALLOC_CONCEPT(capptr::ConceptBound) B> - constexpr SNMALLOC_FAST_PATH_INLINE bool has_domesticate(long) + SNMALLOC_CONCEPT(capptr::IsBound) B> + constexpr SNMALLOC_FAST_PATH bool has_domesticate(long) { return false; } } // namespace detail /** - * Wrapper that calls `Backend::capptr_domesticate` if and only if - * Backend::Options.HasDomesticate is true. If it is not implemented then + * Wrapper that calls `Config::capptr_domesticate` if and only if + * Config::Options.HasDomesticate is true. If it is not implemented then * this assumes that any wild pointer can be domesticated. */ template< - SNMALLOC_CONCEPT(ConceptBackendGlobals) Backend, + SNMALLOC_CONCEPT(IsConfig) Config, typename T, - SNMALLOC_CONCEPT(capptr::ConceptBound) B> + SNMALLOC_CONCEPT(capptr::IsBound) B> SNMALLOC_FAST_PATH_INLINE auto - capptr_domesticate(typename Backend::LocalState* ls, CapPtr p) + capptr_domesticate(typename Config::LocalState* ls, CapPtr p) { static_assert( - !detail::has_domesticate(0) || - Backend::Options.HasDomesticate, + !detail::has_domesticate(0) || + Config::Options.HasDomesticate, "Back end provides domesticate function but opts out of using it "); static_assert( - detail::has_domesticate(0) || - !Backend::Options.HasDomesticate, + detail::has_domesticate(0) || + !Config::Options.HasDomesticate, "Back end does not provide capptr_domesticate and requests its use"); - if constexpr (Backend::Options.HasDomesticate) + if constexpr (Config::Options.HasDomesticate) { - return Backend::capptr_domesticate(ls, p); + return Config::capptr_domesticate(ls, p); } else { UNUSED(ls); return CapPtr< T, - typename B::template with_wildness>( - p.unsafe_ptr()); + typename B::template with_wildness>:: + unsafe_from(p.unsafe_ptr()); } } } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/corealloc.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/corealloc.h index 8aae67d453f8..c7fc79b72452 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/corealloc.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/corealloc.h @@ -32,13 +32,13 @@ namespace snmalloc * provided externally, then it must be set explicitly with * `init_message_queue`. */ - template + template class CoreAllocator : public std::conditional_t< - Backend::Options.CoreAllocIsPoolAllocated, - Pooled>, + Config::Options.CoreAllocIsPoolAllocated, + Pooled>, Empty> { - template + template friend class LocalAllocator; /** @@ -46,8 +46,8 @@ namespace snmalloc * specialised for the back-end that we are using. * @{ */ - using BackendSlabMetadata = typename Backend::SlabMetadata; - using PagemapEntry = typename Backend::Pagemap::Entry; + using BackendSlabMetadata = typename Config::Backend::SlabMetadata; + using PagemapEntry = typename Config::PagemapEntry; /// }@ /** @@ -55,16 +55,17 @@ namespace snmalloc */ struct SlabMetadataCache { -#ifdef SNMALLOC_CHECK_CLIENT - SeqSet available; -#else - // This is slightly faster in some cases, - // but makes memory reuse more predictable. - SeqSet available; -#endif + SeqSet available{}; + uint16_t unused = 0; uint16_t length = 0; - } alloc_classes[NUM_SMALL_SIZECLASSES]; + } alloc_classes[NUM_SMALL_SIZECLASSES]{}; + + /** + * The set of all slabs and large allocations + * from this allocator that are full or almost full. + */ + SeqSet laden{}; /** * Local entropy source and current version of keys for @@ -77,7 +78,7 @@ namespace snmalloc * allocator */ std::conditional_t< - Backend::Options.IsQueueInline, + Config::Options.IsQueueInline, RemoteAllocator, RemoteAllocator*> remote_alloc; @@ -85,7 +86,7 @@ namespace snmalloc /** * The type used local state. This is defined by the back end. */ - using LocalState = typename Backend::LocalState; + using LocalState = typename Config::LocalState; /** * A local area of address space managed by this allocator. @@ -94,7 +95,7 @@ namespace snmalloc * externally. */ std::conditional_t< - Backend::Options.CoreAllocOwnsLocalState, + Config::Options.CoreAllocOwnsLocalState, LocalState, LocalState*> backend_state; @@ -108,7 +109,7 @@ namespace snmalloc /** * Ticker to query the clock regularly at a lower cost. */ - Ticker ticker; + Ticker ticker; /** * The message queue needs to be accessible from other threads @@ -118,7 +119,7 @@ namespace snmalloc */ auto* public_state() { - if constexpr (Backend::Options.IsQueueInline) + if constexpr (Config::Options.IsQueueInline) { return &remote_alloc; } @@ -133,7 +134,7 @@ namespace snmalloc */ LocalState* backend_state_ptr() { - if constexpr (Backend::Options.CoreAllocOwnsLocalState) + if constexpr (Config::Options.CoreAllocOwnsLocalState) { return &backend_state; } @@ -169,41 +170,11 @@ namespace snmalloc /** * The message queue has non-trivial initialisation as it needs to - * be non-empty, so we prime it with a single allocation. + * be non-empty, so we prime it with a fake allocation. */ void init_message_queue() { - // Manufacture an allocation to prime the queue - // Using an actual allocation removes a conditional from a critical path. - auto dummy = capptr::Alloc(small_alloc_one(MIN_ALLOC_SIZE)) - .template as_static>(); - if (dummy == nullptr) - { - error("Critical error: Out-of-memory during initialisation."); - } - message_queue().init(dummy); - } - - /** - * There are a few internal corner cases where we need to allocate - * a small object. These are not on the fast path, - * - Allocating stub in the message queue - * Note this is not performance critical as very infrequently called. - */ - capptr::Alloc small_alloc_one(size_t size) - { - SNMALLOC_ASSERT(attached_cache != nullptr); - auto domesticate = - [this](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { - return capptr_domesticate(backend_state_ptr(), p); - }; - // Use attached cache, and fill it if it is empty. - return attached_cache->template alloc( - domesticate, - size, - [&](smallsizeclass_t sizeclass, freelist::Iter<>* fl) { - return small_alloc(sizeclass, *fl); - }); + message_queue().init(); } static SNMALLOC_FAST_PATH void alloc_new_list( @@ -219,73 +190,78 @@ namespace snmalloc auto& b = meta->free_queue; -#ifdef SNMALLOC_CHECK_CLIENT - // Structure to represent the temporary list elements - struct PreAllocObject - { - capptr::AllocFull next; - }; - // The following code implements Sattolo's algorithm for generating - // random cyclic permutations. This implementation is in the opposite - // direction, so that the original space does not need initialising. This - // is described as outside-in without citation on Wikipedia, appears to be - // Folklore algorithm. - - // Note the wide bounds on curr relative to each of the ->next fields; - // curr is not persisted once the list is built. - capptr::Chunk curr = - pointer_offset(bumpptr, 0).template as_static(); - curr->next = Aal::capptr_bound( - curr, rsize); - - uint16_t count = 1; - for (curr = - pointer_offset(curr, rsize).template as_static(); - curr.as_void() < slab_end; - curr = - pointer_offset(curr, rsize).template as_static()) + if constexpr (mitigations(random_initial)) { - size_t insert_index = entropy.sample(count); - curr->next = std::exchange( - pointer_offset(bumpptr, insert_index * rsize) - .template as_static() - ->next, + // Structure to represent the temporary list elements + struct PreAllocObject + { + capptr::AllocFull next; + }; + // The following code implements Sattolo's algorithm for generating + // random cyclic permutations. This implementation is in the opposite + // direction, so that the original space does not need initialising. + // This is described as outside-in without citation on Wikipedia, + // appears to be Folklore algorithm. + + // Note the wide bounds on curr relative to each of the ->next fields; + // curr is not persisted once the list is built. + capptr::Chunk curr = + pointer_offset(bumpptr, 0).template as_static(); + curr->next = Aal::capptr_bound( - curr, rsize)); - count++; - } + curr, rsize); + + uint16_t count = 1; + for (curr = + pointer_offset(curr, rsize).template as_static(); + curr.as_void() < slab_end; + curr = + pointer_offset(curr, rsize).template as_static()) + { + size_t insert_index = entropy.sample(count); + curr->next = std::exchange( + pointer_offset(bumpptr, insert_index * rsize) + .template as_static() + ->next, + Aal::capptr_bound( + curr, rsize)); + count++; + } - // Pick entry into space, and then build linked list by traversing cycle - // to the start. Use ->next to jump from Chunk to Alloc. - auto start_index = entropy.sample(count); - auto start_ptr = pointer_offset(bumpptr, start_index * rsize) - .template as_static() - ->next; - auto curr_ptr = start_ptr; - do - { - b.add( - // Here begins our treatment of the heap as containing Wild pointers - freelist::Object::make( - capptr_to_user_address_control(curr_ptr.as_void())), - key, - entropy); - curr_ptr = curr_ptr->next; - } while (curr_ptr != start_ptr); -#else - auto p = bumpptr; - do + // Pick entry into space, and then build linked list by traversing cycle + // to the start. Use ->next to jump from Chunk to Alloc. + auto start_index = entropy.sample(count); + auto start_ptr = pointer_offset(bumpptr, start_index * rsize) + .template as_static() + ->next; + auto curr_ptr = start_ptr; + do + { + b.add( + // Here begins our treatment of the heap as containing Wild pointers + freelist::Object::make( + capptr_to_user_address_control(curr_ptr.as_void())), + key, + entropy); + curr_ptr = curr_ptr->next; + } while (curr_ptr != start_ptr); + } + else { - b.add( - // Here begins our treatment of the heap as containing Wild pointers - freelist::Object::make( - capptr_to_user_address_control( - Aal::capptr_bound( - p.as_void(), rsize))), - key); - p = pointer_offset(p, rsize); - } while (p < slab_end); -#endif + auto p = bumpptr; + do + { + b.add( + // Here begins our treatment of the heap as containing Wild pointers + freelist::Object::make( + capptr_to_user_address_control( + Aal::capptr_bound( + p.as_void(), rsize))), + key, + entropy); + p = pointer_offset(p, rsize); + } while (p < slab_end); + } // This code consumes everything up to slab_end. bumpptr = slab_end; } @@ -300,53 +276,49 @@ namespace snmalloc auto local_state = backend_state_ptr(); auto domesticate = [local_state](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { - return capptr_domesticate(local_state, p); + return capptr_domesticate(local_state, p); }; capptr::Alloc p = finish_alloc_no_zero(fl.take(key, domesticate), sizeclass); -#ifdef SNMALLOC_CHECK_CLIENT - // Check free list is well-formed on platforms with - // integers as pointers. - size_t count = 1; // Already taken one above. - while (!fl.empty()) - { - fl.take(key, domesticate); - count++; - } - // Check the list contains all the elements - SNMALLOC_CHECK( - (count + more) == snmalloc::sizeclass_to_slab_object_count(sizeclass)); - - if (more > 0) + // If clear_meta is requested, we should also walk the free list to clear + // it. + // TODO: we could optimise the clear_meta case to not walk the free list + // and instead just clear the whole slab, but that requires amplification. + if constexpr ( + mitigations(freelist_teardown_validate) || mitigations(clear_meta)) { - auto no_more = meta->free_queue.close(fl, key); - SNMALLOC_ASSERT(no_more == 0); - UNUSED(no_more); - + // Check free list is well-formed on platforms with + // integers as pointers. + size_t count = 1; // Already taken one above. while (!fl.empty()) { fl.take(key, domesticate); count++; } + // Check the list contains all the elements + SNMALLOC_CHECK( + (count + more) == + snmalloc::sizeclass_to_slab_object_count(sizeclass)); + + if (more > 0) + { + auto no_more = meta->free_queue.close(fl, key); + SNMALLOC_ASSERT(no_more == 0); + UNUSED(no_more); + + while (!fl.empty()) + { + fl.take(key, domesticate); + count++; + } + } + SNMALLOC_CHECK( + count == snmalloc::sizeclass_to_slab_object_count(sizeclass)); } - SNMALLOC_CHECK( - count == snmalloc::sizeclass_to_slab_object_count(sizeclass)); -#endif - // TODO: This is a capability amplification as we are saying we - // have the whole chunk. auto start_of_slab = pointer_align_down( p, snmalloc::sizeclass_to_slab_size(sizeclass)); -#if defined(__CHERI_PURE_CAPABILITY__) && !defined(SNMALLOC_CHECK_CLIENT) - // Zero the whole slab. For CHERI we at least need to clear the freelist - // pointers to avoid leaking capabilities but we do not need to do it in - // the freelist order as for SNMALLOC_CHECK_CLIENT. Zeroing the whole slab - // may be more friendly to hw because it does not involve pointer chasing - // and is amenable to prefetching. - // FIXME: This should be a back-end method guarded on a feature flag. -#endif - #ifdef SNMALLOC_TRACING message<1024>( "Slab {} is unused, Object sizeclass {}", @@ -360,10 +332,10 @@ namespace snmalloc SNMALLOC_SLOW_PATH void dealloc_local_slabs(smallsizeclass_t sizeclass) { // Return unused slabs of sizeclass_t back to global allocator - alloc_classes[sizeclass].available.filter([this, sizeclass](auto* meta) { + alloc_classes[sizeclass].available.iterate([this, sizeclass](auto* meta) { auto domesticate = [this](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { - auto res = capptr_domesticate(backend_state_ptr(), p); + auto res = capptr_domesticate(backend_state_ptr(), p); #ifdef SNMALLOC_TRACING if (res.unsafe_ptr() != p.unsafe_ptr()) printf( @@ -378,23 +350,25 @@ namespace snmalloc { meta->free_queue.validate(entropy.get_free_list_key(), domesticate); } - return false; + return; } alloc_classes[sizeclass].length--; alloc_classes[sizeclass].unused--; + // Remove from the list. This must be done before dealloc chunk + // as that may corrupt the node. + meta->node.remove(); + // TODO delay the clear to the next user of the slab, or teardown so // don't touch the cache lines at this point in snmalloc_check_client. auto start = clear_slab(meta, sizeclass); - Backend::dealloc_chunk( + Config::Backend::dealloc_chunk( get_backend_local_state(), *meta, start, sizeclass_to_slab_size(sizeclass)); - - return true; }); } @@ -423,7 +397,11 @@ namespace snmalloc UNUSED(size); #endif - Backend::dealloc_chunk(get_backend_local_state(), *meta, p, size); + // Remove from set of fully used slabs. + meta->node.remove(); + + Config::Backend::dealloc_chunk( + get_backend_local_state(), *meta, p, size); return; } @@ -438,6 +416,9 @@ namespace snmalloc // Wake slab up. meta->set_not_sleeping(sizeclass); + // Remove from set of fully used slabs. + meta->node.remove(); + alloc_classes[sizeclass].available.insert(meta); alloc_classes[sizeclass].length++; @@ -469,7 +450,19 @@ namespace snmalloc */ SNMALLOC_FAST_PATH bool has_messages() { - return !(message_queue().is_empty()); + auto domesticate = [local_state = backend_state_ptr()]( + freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { + if constexpr (Config::Options.QueueHeadsAreTame) + { + return freelist::HeadPtr::unsafe_from(p.unsafe_ptr()); + } + else + { + return capptr_domesticate(local_state, p); + } + }; + + return !(message_queue().can_dequeue(domesticate)); } /** @@ -483,7 +476,7 @@ namespace snmalloc auto local_state = backend_state_ptr(); auto domesticate = [local_state](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { - return capptr_domesticate(local_state, p); + return capptr_domesticate(local_state, p); }; auto cb = [this, &need_post](freelist::HeadPtr msg) SNMALLOC_FAST_PATH_LAMBDA { @@ -492,28 +485,28 @@ namespace snmalloc #endif auto& entry = - Backend::Pagemap::template get_metaentry(snmalloc::address_cast(msg)); + Config::Backend::template get_metaentry(snmalloc::address_cast(msg)); handle_dealloc_remote(entry, msg.as_void(), need_post); return true; }; - if constexpr (Backend::Options.QueueHeadsAreTame) + if constexpr (Config::Options.QueueHeadsAreTame) { /* * The front of the queue has already been validated; just change the * annotating type. */ - auto domesticate_first = [](freelist::QueuePtr p) - SNMALLOC_FAST_PATH_LAMBDA { - return freelist::HeadPtr(p.unsafe_ptr()); - }; - message_queue().dequeue(key_global, domesticate_first, domesticate, cb); + auto domesticate_first = + [](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { + return freelist::HeadPtr::unsafe_from(p.unsafe_ptr()); + }; + message_queue().dequeue(domesticate_first, domesticate, cb); } else { - message_queue().dequeue(key_global, domesticate, domesticate, cb); + message_queue().dequeue(domesticate, domesticate, cb); } if (need_post) @@ -555,7 +548,7 @@ namespace snmalloc need_post = true; attached_cache->remote_dealloc_cache .template dealloc( - entry.get_remote()->trunc_id(), p.as_void(), key_global); + entry.get_remote()->trunc_id(), p.as_void()); } } @@ -571,12 +564,12 @@ namespace snmalloc // Entropy must be first, so that all data-structures can use the key // it generates. // This must occur before any freelists are constructed. - entropy.init(); + entropy.init(); // Ignoring stats for now. // stats().start(); - if constexpr (Backend::Options.IsQueueInline) + if constexpr (Config::Options.IsQueueInline) { init_message_queue(); message_queue().invariant(); @@ -606,8 +599,8 @@ namespace snmalloc * SFINAE disabled if the allocator does not own the local state. */ template< - typename Config = Backend, - typename = std::enable_if_t> + typename Config_ = Config, + typename = std::enable_if_t> CoreAllocator(LocalCache* cache) : attached_cache(cache) { init(); @@ -618,8 +611,8 @@ namespace snmalloc * state. SFINAE disabled if the allocator does own the local state. */ template< - typename Config = Backend, - typename = std::enable_if_t> + typename Config_ = Config, + typename = std::enable_if_t> CoreAllocator(LocalCache* cache, LocalState* backend = nullptr) : backend_state(backend), attached_cache(cache) { @@ -630,7 +623,7 @@ namespace snmalloc * If the message queue is not inline, provide it. This will then * configure the message queue for use. */ - template + template std::enable_if_t init_message_queue(RemoteAllocator* q) { remote_alloc = q; @@ -649,8 +642,8 @@ namespace snmalloc // stats().remote_post(); // TODO queue not in line! bool sent_something = attached_cache->remote_dealloc_cache - .post( - backend_state_ptr(), public_state()->trunc_id(), key_global); + .post( + backend_state_ptr(), public_state()->trunc_id()); return sent_something; } @@ -674,7 +667,7 @@ namespace snmalloc // PagemapEntry-s seen here are expected to have meaningful Remote // pointers auto& entry = - Backend::Pagemap::template get_metaentry(snmalloc::address_cast(p)); + Config::Backend::template get_metaentry(snmalloc::address_cast(p)); if (SNMALLOC_LIKELY(dealloc_local_object_fast(entry, p, entropy))) return; @@ -691,6 +684,7 @@ namespace snmalloc SNMALLOC_ASSERT(!meta->is_unused()); snmalloc_check_client( + mitigations(sanity_checks), is_start_of_object(entry.get_sizeclass(), address_cast(p)), "Not deallocating start of an object"); @@ -712,22 +706,18 @@ namespace snmalloc auto& sl = alloc_classes[sizeclass].available; if (SNMALLOC_LIKELY(alloc_classes[sizeclass].length > 0)) { -#ifdef SNMALLOC_CHECK_CLIENT - // Occassionally don't use the last list. - if (SNMALLOC_UNLIKELY(alloc_classes[sizeclass].length == 1)) + if constexpr (mitigations(random_extra_slab)) { - // If the slab has a lot of free space, then we shouldn't allocate a - // new slab. - auto min = alloc_classes[sizeclass] - .available.peek() - ->free_queue.min_list_length(); - if ((min * 2) < threshold_for_waking_slab(sizeclass)) + // Occassionally don't use the last list. + if (SNMALLOC_UNLIKELY(alloc_classes[sizeclass].length == 1)) + { if (entropy.next_bit() == 0) return small_alloc_slow(sizeclass, fast_free_list); + } } -#endif - auto meta = sl.pop(); + // Mitigations use LIFO to increase time to reuse. + auto meta = sl.template pop(); // Drop length of sl, and empty count if it was empty. alloc_classes[sizeclass].length--; if (meta->needed() == 0) @@ -735,7 +725,7 @@ namespace snmalloc auto domesticate = [this](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { - return capptr_domesticate(backend_state_ptr(), p); + return capptr_domesticate(backend_state_ptr(), p); }; auto [p, still_active] = BackendSlabMetadata::alloc_free_list( domesticate, meta, fast_free_list, entropy, sizeclass); @@ -745,8 +735,12 @@ namespace snmalloc alloc_classes[sizeclass].length++; sl.insert(meta); } + else + { + laden.insert(meta); + } - auto r = finish_alloc(p, sizeclass); + auto r = finish_alloc(p, sizeclass); return ticker.check_tick(r); } return small_alloc_slow(sizeclass, fast_free_list); @@ -759,7 +753,7 @@ namespace snmalloc SNMALLOC_FAST_PATH LocalState& get_backend_local_state() { - if constexpr (Backend::Options.CoreAllocOwnsLocalState) + if constexpr (Config::Options.CoreAllocOwnsLocalState) { return backend_state; } @@ -783,7 +777,7 @@ namespace snmalloc message<1024>("small_alloc_slow rsize={} slab size={}", rsize, slab_size); #endif - auto [slab, meta] = Backend::alloc_chunk( + auto [slab, meta] = Config::Backend::alloc_chunk( get_backend_local_state(), slab_size, PagemapEntry::encode( @@ -795,14 +789,15 @@ namespace snmalloc } // Set meta slab to empty. - meta->initialise(sizeclass); + meta->initialise( + sizeclass, address_cast(slab), entropy.get_free_list_key()); // Build a free list for the slab alloc_new_list(slab, meta, rsize, slab_size, entropy); auto domesticate = [this](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { - return capptr_domesticate(backend_state_ptr(), p); + return capptr_domesticate(backend_state_ptr(), p); }; auto [p, still_active] = BackendSlabMetadata::alloc_free_list( domesticate, meta, fast_free_list, entropy, sizeclass); @@ -812,8 +807,12 @@ namespace snmalloc alloc_classes[sizeclass].length++; alloc_classes[sizeclass].available.insert(meta); } + else + { + laden.insert(meta); + } - auto r = finish_alloc(p, sizeclass); + auto r = finish_alloc(p, sizeclass); return ticker.check_tick(r); } @@ -828,7 +827,7 @@ namespace snmalloc auto local_state = backend_state_ptr(); auto domesticate = [local_state](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { - return capptr_domesticate(local_state, p); + return capptr_domesticate(local_state, p); }; if (destroy_queue) @@ -839,9 +838,10 @@ namespace snmalloc while (p_tame != nullptr) { bool need_post = true; // Always going to post, so ignore. - auto n_tame = p_tame->atomic_read_next(key_global, domesticate); + auto n_tame = + p_tame->atomic_read_next(RemoteAllocator::key_global, domesticate); const PagemapEntry& entry = - Backend::Pagemap::get_metaentry(snmalloc::address_cast(p_tame)); + Config::Backend::get_metaentry(snmalloc::address_cast(p_tame)); handle_dealloc_remote(entry, p_tame.as_void(), need_post); p_tame = n_tame; } @@ -854,7 +854,7 @@ namespace snmalloc handle_message_queue([]() {}); } - auto posted = attached_cache->flush( + auto posted = attached_cache->flush( backend_state_ptr(), [&](capptr::Alloc p) { dealloc_local_object(p); }); @@ -865,6 +865,14 @@ namespace snmalloc dealloc_local_slabs(sizeclass); } + laden.iterate([this, domesticate]( + BackendSlabMetadata* meta) SNMALLOC_FAST_PATH_LAMBDA { + if (!meta->is_large()) + { + meta->free_queue.validate(entropy.get_free_list_key(), domesticate); + } + }); + return posted; } @@ -893,16 +901,33 @@ namespace snmalloc */ bool debug_is_empty_impl(bool* result) { - auto test = [&result](auto& queue) { - queue.filter([&result](auto slab_metadata) { + auto& key = entropy.get_free_list_key(); + + auto error = [&result, &key](auto slab_metadata) { + auto slab_interior = slab_metadata->get_slab_interior(key); + const PagemapEntry& entry = + Config::Backend::get_metaentry(slab_interior); + SNMALLOC_ASSERT(slab_metadata == entry.get_slab_metadata()); + auto size_class = entry.get_sizeclass(); + auto slab_size = sizeclass_full_to_slab_size(size_class); + auto slab_start = bits::align_down(slab_interior, slab_size); + + if (result != nullptr) + *result = false; + else + report_fatal_error( + "debug_is_empty: found non-empty allocator: size={} on " + "slab_start {}", + sizeclass_full_to_size(size_class), + slab_start); + }; + + auto test = [&error](auto& queue) { + queue.iterate([&error](auto slab_metadata) { if (slab_metadata->needed() != 0) { - if (result != nullptr) - *result = false; - else - error("debug_is_empty: found non-empty allocator"); + error(slab_metadata); } - return false; }); }; @@ -913,6 +938,11 @@ namespace snmalloc test(alloc_class.available); } + if (!laden.is_empty()) + { + error(laden.peek()); + } + // Place the static stub message on the queue. init_message_queue(); @@ -961,6 +991,6 @@ namespace snmalloc /** * Use this alias to access the pool of allocators throughout snmalloc. */ - template - using AllocPool = Pool, Backend, Backend::pool>; + template + using AllocPool = Pool, Config, Config::pool>; } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/entropy.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/entropy.h index 1b590942f324..2e63b68bfa00 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/entropy.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/entropy.h @@ -1,33 +1,13 @@ #pragma once +#include "../ds/ds.h" #include "../pal/pal.h" #include #include -#ifndef SNMALLOC_PLATFORM_HAS_GETENTROPY -# include -#endif namespace snmalloc { - template - std::enable_if_t, uint64_t> get_entropy64() - { - return PAL::get_entropy64(); - } - - template - std::enable_if_t, uint64_t> get_entropy64() - { -#ifdef SNMALLOC_PLATFORM_HAS_GETENTROPY - return DefaultPal::get_entropy64(); -#else - std::random_device rd; - uint64_t a = rd(); - return (a << 32) ^ rd(); -#endif - } - struct FreeListKey { address_t key1; diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/external_alloc.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/external_alloc.h index 4eecfa96f173..250719766e43 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/external_alloc.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/external_alloc.h @@ -45,6 +45,11 @@ namespace snmalloc::external_alloc { using ::malloc_usable_size; } +# elif defined(__DragonFly__) +namespace snmalloc::external_alloc +{ + using ::malloc_usable_size; +} # else # error Define malloc size macro for this platform. # endif diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/freelist.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/freelist.h index 3c70307f4742..49348d1d8a93 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/freelist.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/freelist.h @@ -57,8 +57,7 @@ namespace snmalloc { public: template< - SNMALLOC_CONCEPT(capptr::ConceptBound) BQueue = - capptr::bounds::AllocWild> + SNMALLOC_CONCEPT(capptr::IsBound) BQueue = capptr::bounds::AllocWild> class T; /** @@ -67,13 +66,13 @@ namespace snmalloc * place. Give it a shorter name (Object::BQueuePtr) for * convenience. */ - template + template using BQueuePtr = CapPtr, BQueue>; /** * As with BQueuePtr, but atomic. */ - template + template using BAtomicQueuePtr = AtomicCapPtr, BQueue>; /** @@ -83,16 +82,16 @@ namespace snmalloc * looks a little nicer than "CapPtr, BView>". */ template< - SNMALLOC_CONCEPT(capptr::ConceptBound) BView, - SNMALLOC_CONCEPT(capptr::ConceptBound) BQueue> + SNMALLOC_CONCEPT(capptr::IsBound) BView, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue> using BHeadPtr = CapPtr, BView>; /** * As with BHeadPtr, but atomic. */ template< - SNMALLOC_CONCEPT(capptr::ConceptBound) BView, - SNMALLOC_CONCEPT(capptr::ConceptBound) BQueue> + SNMALLOC_CONCEPT(capptr::IsBound) BView, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue> using BAtomicHeadPtr = AtomicCapPtr, BView>; /** @@ -112,34 +111,64 @@ namespace snmalloc * require size to be threaded through, but would provide more OOB * detection. */ - template + template class T { template< bool, - bool, - SNMALLOC_CONCEPT(capptr::ConceptBound), - SNMALLOC_CONCEPT(capptr::ConceptBound)> + SNMALLOC_CONCEPT(capptr::IsBound), + SNMALLOC_CONCEPT(capptr::IsBound)> friend class Builder; friend class Object; + class Empty + { + public: + void check_prev(address_t) {} + + void set_prev(address_t) {} + }; + + class Prev + { + address_t prev_encoded; + + public: + /** + * Check the signature of this free Object + */ + void check_prev(address_t signed_prev) + { + snmalloc_check_client( + mitigations(freelist_backward_edge), + signed_prev == prev_encoded, + "Heap corruption - free list corrupted!"); + UNUSED(signed_prev); + } + + void set_prev(address_t signed_prev) + { + prev_encoded = signed_prev; + } + }; + union { BQueuePtr next_object; // TODO: Should really use C++20 atomic_ref rather than a union. BAtomicQueuePtr atomic_next_object; }; -#ifdef SNMALLOC_CHECK_CLIENT - // Encoded representation of a back pointer. - // Hard to fake, and provides consistency on - // the next pointers. - address_t prev_encoded; -#endif + + SNMALLOC_NO_UNIQUE_ADDRESS + std::conditional_t + prev{}; public: + constexpr T() : next_object(){}; + template< - SNMALLOC_CONCEPT(capptr::ConceptBound) BView = typename BQueue:: + SNMALLOC_CONCEPT(capptr::IsBound) BView = typename BQueue:: template with_wildness, typename Domesticator> BHeadPtr @@ -150,13 +179,15 @@ namespace snmalloc this->atomic_next_object.load(std::memory_order_acquire), key); auto n_tame = domesticate(n_wild); -#ifdef SNMALLOC_CHECK_CLIENT - if (n_tame != nullptr) + if constexpr (mitigations(freelist_backward_edge)) { - n_tame->check_prev( - signed_prev(address_cast(this), address_cast(n_tame), key)); + if (n_tame != nullptr) + { + n_tame->prev.check_prev( + signed_prev(address_cast(this), address_cast(n_tame), key)); + } } -#endif + Aal::prefetch(&(n_tame->next_object)); return n_tame; } @@ -164,7 +195,7 @@ namespace snmalloc * Read the next pointer */ template< - SNMALLOC_CONCEPT(capptr::ConceptBound) BView = typename BQueue:: + SNMALLOC_CONCEPT(capptr::IsBound) BView = typename BQueue:: template with_wildness, typename Domesticator> BHeadPtr @@ -179,32 +210,29 @@ namespace snmalloc */ void check_prev(address_t signed_prev) { - UNUSED(signed_prev); - snmalloc_check_client( - signed_prev == this->prev_encoded, - "Heap corruption - free list corrupted!"); + prev.check_prev(signed_prev); } /** - * Clean up this object when removing it from the list. This is - * important on CHERI to avoid leaking capabilities. On CHECK_CLIENT - * builds it might increase the difficulty to bypass the checks. + * Clean up this object when removing it from the list. */ void cleanup() { -#if defined(__CHERI_PURE_CAPABILITY__) || defined(SNMALLOC_CHECK_CLIENT) - this->next_object = nullptr; -# ifdef SNMALLOC_CHECK_CLIENT - this->prev_encoded = 0; -# endif -#endif + if constexpr (mitigations(clear_meta)) + { + this->next_object = nullptr; + if constexpr (mitigations(freelist_backward_edge)) + { + this->prev.set_prev(0); + } + } } }; // Note the inverted template argument order, since BView is inferable. template< - SNMALLOC_CONCEPT(capptr::ConceptBound) BQueue, - SNMALLOC_CONCEPT(capptr::ConceptBound) BView> + SNMALLOC_CONCEPT(capptr::IsBound) BQueue, + SNMALLOC_CONCEPT(capptr::IsBound) BView> static BHeadPtr make(CapPtr p) { return p.template as_static>(); @@ -213,7 +241,7 @@ namespace snmalloc /** * A container-of operation to convert &f->next_object to f */ - template + template static Object::T* from_next_ptr(CapPtr, BQueue>* ptr) { @@ -221,11 +249,10 @@ namespace snmalloc return reinterpret_cast*>(ptr); } - private: /** * Involutive encryption with raw pointers */ - template + template inline static Object::T* code_next(address_t curr, Object::T* next, const FreeListKey& key) { @@ -236,7 +263,8 @@ namespace snmalloc // Curr is not used in the current encoding scheme. UNUSED(curr); - if constexpr (CHECK_CLIENT && !aal_supports) + if constexpr ( + mitigations(freelist_forward_edge) && !aal_supports) { return unsafe_from_uintptr>( unsafe_to_uintptr>(next) ^ key.key_next); @@ -248,7 +276,6 @@ namespace snmalloc } } - public: /** * Encode next. We perform two convenient little bits of type-level * sleight of hand here: @@ -265,12 +292,13 @@ namespace snmalloc * is likely (but not necessarily) Wild. */ template< - SNMALLOC_CONCEPT(capptr::ConceptBound) BView, - SNMALLOC_CONCEPT(capptr::ConceptBound) BQueue> + SNMALLOC_CONCEPT(capptr::IsBound) BView, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue> inline static BQueuePtr encode_next( address_t curr, BHeadPtr next, const FreeListKey& key) { - return BQueuePtr(code_next(curr, next.unsafe_ptr(), key)); + return BQueuePtr::unsafe_from( + code_next(curr, next.unsafe_ptr(), key)); } /** @@ -289,17 +317,18 @@ namespace snmalloc * encapsulated within Object and we do not capture any of it statically. */ template< - SNMALLOC_CONCEPT(capptr::ConceptBound) BView, - SNMALLOC_CONCEPT(capptr::ConceptBound) BQueue> + SNMALLOC_CONCEPT(capptr::IsBound) BView, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue> inline static BHeadPtr decode_next( address_t curr, BHeadPtr next, const FreeListKey& key) { - return BHeadPtr(code_next(curr, next.unsafe_ptr(), key)); + return BHeadPtr::unsafe_from( + code_next(curr, next.unsafe_ptr(), key)); } template< - SNMALLOC_CONCEPT(capptr::ConceptBound) BView, - SNMALLOC_CONCEPT(capptr::ConceptBound) BQueue> + SNMALLOC_CONCEPT(capptr::IsBound) BView, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue> static void assert_view_queue_bounds() { static_assert( @@ -324,8 +353,8 @@ namespace snmalloc * next->next_object is nullptr). */ template< - SNMALLOC_CONCEPT(capptr::ConceptBound) BView, - SNMALLOC_CONCEPT(capptr::ConceptBound) BQueue> + SNMALLOC_CONCEPT(capptr::IsBound) BView, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue> static BQueuePtr* store_next( BQueuePtr* curr, BHeadPtr next, @@ -333,17 +362,19 @@ namespace snmalloc { assert_view_queue_bounds(); -#ifdef SNMALLOC_CHECK_CLIENT - next->prev_encoded = - signed_prev(address_cast(curr), address_cast(next), key); -#else - UNUSED(key); -#endif + if constexpr (mitigations(freelist_backward_edge)) + { + next->prev.set_prev( + signed_prev(address_cast(curr), address_cast(next), key)); + } + else + UNUSED(key); + *curr = encode_next(address_cast(curr), next, key); return &(next->next_object); } - template + template static void store_null(BQueuePtr* curr, const FreeListKey& key) { *curr = @@ -356,8 +387,8 @@ namespace snmalloc * Uses the atomic view of next, so can be used in the message queues. */ template< - SNMALLOC_CONCEPT(capptr::ConceptBound) BView, - SNMALLOC_CONCEPT(capptr::ConceptBound) BQueue> + SNMALLOC_CONCEPT(capptr::IsBound) BView, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue> static void atomic_store_next( BHeadPtr curr, BHeadPtr next, @@ -365,12 +396,14 @@ namespace snmalloc { static_assert(BView::wildness == capptr::dimension::Wildness::Tame); -#ifdef SNMALLOC_CHECK_CLIENT - next->prev_encoded = - signed_prev(address_cast(curr), address_cast(next), key); -#else - UNUSED(key); -#endif + if constexpr (mitigations(freelist_backward_edge)) + { + next->prev.set_prev( + signed_prev(address_cast(curr), address_cast(next), key)); + } + else + UNUSED(key); + // Signature needs to be visible before item is linked in // so requires release semantics. curr->atomic_next_object.store( @@ -379,8 +412,8 @@ namespace snmalloc } template< - SNMALLOC_CONCEPT(capptr::ConceptBound) BView, - SNMALLOC_CONCEPT(capptr::ConceptBound) BQueue> + SNMALLOC_CONCEPT(capptr::IsBound) BView, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue> static void atomic_store_null(BHeadPtr curr, const FreeListKey& key) { @@ -420,28 +453,55 @@ namespace snmalloc */ using AtomicQueuePtr = Object::BAtomicQueuePtr; + class Prev + { + address_t prev{0}; + + protected: + constexpr Prev(address_t prev) : prev(prev) {} + constexpr Prev() = default; + + address_t replace(address_t next) + { + auto p = prev; + prev = next; + return p; + } + }; + + class NoPrev + { + protected: + constexpr NoPrev(address_t){}; + constexpr NoPrev() = default; + + address_t replace(address_t t) + { + // This should never be called. + SNMALLOC_CHECK(false); + return t; + } + }; + + using IterBase = + std::conditional_t; + /** * Used to iterate a free list in object space. * * Checks signing of pointers */ template< - SNMALLOC_CONCEPT(capptr::ConceptBound) BView = capptr::bounds::Alloc, - SNMALLOC_CONCEPT(capptr::ConceptBound) BQueue = capptr::bounds::AllocWild> - class Iter + SNMALLOC_CONCEPT(capptr::IsBound) BView = capptr::bounds::Alloc, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue = capptr::bounds::AllocWild> + class Iter : IterBase { Object::BHeadPtr curr{nullptr}; -#ifdef SNMALLOC_CHECK_CLIENT - address_t prev{0}; -#endif public: constexpr Iter(Object::BHeadPtr head, address_t prev_value) - : curr(head) + : IterBase(prev_value), curr(head) { -#ifdef SNMALLOC_CHECK_CLIENT - prev = prev_value; -#endif UNUSED(prev_value); } @@ -475,12 +535,16 @@ namespace snmalloc Aal::prefetch(next.unsafe_ptr()); curr = next; -#ifdef SNMALLOC_CHECK_CLIENT - c->check_prev(prev); - prev = signed_prev(address_cast(c), address_cast(next), key); -#else - UNUSED(key); -#endif + + if constexpr (mitigations(freelist_backward_edge)) + { + auto p = + replace(signed_prev(address_cast(c), address_cast(next), key)); + c->check_prev(p); + } + else + UNUSED(key); + c->cleanup(); return c; } @@ -505,9 +569,8 @@ namespace snmalloc */ template< bool RANDOM, - bool INIT = true, - SNMALLOC_CONCEPT(capptr::ConceptBound) BView = capptr::bounds::Alloc, - SNMALLOC_CONCEPT(capptr::ConceptBound) BQueue = capptr::bounds::AllocWild> + SNMALLOC_CONCEPT(capptr::IsBound) BView = capptr::bounds::Alloc, + SNMALLOC_CONCEPT(capptr::IsBound) BQueue = capptr::bounds::AllocWild> class Builder { static constexpr size_t LENGTH = RANDOM ? 2 : 1; @@ -531,7 +594,7 @@ namespace snmalloc // This enables branch free enqueuing. std::array end{nullptr}; - Object::BQueuePtr* cast_end(uint32_t ix) + [[nodiscard]] Object::BQueuePtr* cast_end(uint32_t ix) const { return reinterpret_cast*>(end[ix]); } @@ -541,22 +604,16 @@ namespace snmalloc end[ix] = reinterpret_cast(p); } - Object::BHeadPtr cast_head(uint32_t ix) + [[nodiscard]] Object::BHeadPtr cast_head(uint32_t ix) const { - return Object::BHeadPtr( + return Object::BHeadPtr::unsafe_from( static_cast*>(head[ix])); } - std::array length{}; + SNMALLOC_NO_UNIQUE_ADDRESS std::array length{}; public: - constexpr Builder() - { - if (INIT) - { - init(); - } - } + constexpr Builder() = default; /** * Checks if the builder contains any elements. @@ -628,8 +685,8 @@ namespace snmalloc * and is thus subject to encoding if the next_object pointers * encoded. */ - Object::BHeadPtr - read_head(uint32_t index, const FreeListKey& key) + [[nodiscard]] Object::BHeadPtr + read_head(uint32_t index, const FreeListKey& key) const { return Object::decode_next( address_cast(&head[index]), cast_head(index), key); @@ -687,15 +744,25 @@ namespace snmalloc /** * Set the builder to a not building state. */ - constexpr void init() + constexpr void init(address_t slab, const FreeListKey& key) { for (size_t i = 0; i < LENGTH; i++) { end[i] = &head[i]; - if (RANDOM) + if constexpr (RANDOM) { length[i] = 0; } + + // Head is not live when a building is initialised. + // We use this slot to store a pointer into the slab for the + // allocations. This then establishes the invariant that head is + // always (a possibly encoded) pointer into the slab, and thus + // the Freelist builder always knows which block it is referring too. + head[i] = Object::code_next( + address_cast(&head[i]), + useless_ptr_from_addr>(slab), + key); } } @@ -715,9 +782,9 @@ namespace snmalloc // this is doing a CONTAINING_RECORD like cast to get back // to the actual object. This isn't true if the builder is // empty, but you are not allowed to call this in the empty case. - auto last = - Object::BHeadPtr(Object::from_next_ptr(cast_end(0))); - init(); + auto last = Object::BHeadPtr::unsafe_from( + Object::from_next_ptr(cast_end(0))); + init(address_cast(head[0]), key); return {first, last}; } @@ -725,48 +792,37 @@ namespace snmalloc SNMALLOC_FAST_PATH void validate(const FreeListKey& key, Domesticator domesticate) { -#ifdef SNMALLOC_CHECK_CLIENT - for (uint32_t i = 0; i < LENGTH; i++) + if constexpr (mitigations(freelist_teardown_validate)) { - if (&head[i] == end[i]) - { - SNMALLOC_CHECK(length[i] == 0); - continue; - } - - size_t count = 1; - auto curr = read_head(i, key); - auto prev = get_fake_signed_prev(i, key); - while (true) + for (uint32_t i = 0; i < LENGTH; i++) { - curr->check_prev(prev); - if (address_cast(&(curr->next_object)) == address_cast(end[i])) - break; - count++; - auto next = curr->read_next(key, domesticate); - prev = signed_prev(address_cast(curr), address_cast(next), key); - curr = next; + if (&head[i] == end[i]) + { + SNMALLOC_CHECK(!RANDOM || (length[i] == 0)); + continue; + } + + size_t count = 1; + auto curr = read_head(i, key); + auto prev = get_fake_signed_prev(i, key); + while (true) + { + curr->check_prev(prev); + if (address_cast(&(curr->next_object)) == address_cast(end[i])) + break; + count++; + auto next = curr->read_next(key, domesticate); + prev = signed_prev(address_cast(curr), address_cast(next), key); + curr = next; + } + SNMALLOC_CHECK(!RANDOM || (count == length[i])); } - SNMALLOC_CHECK(count == length[i]); } -#else - UNUSED(key); - UNUSED(domesticate); -#endif - } - - /** - * Returns length of the shorter free list. - * - * This method is only usable if the free list is adding randomisation - * as that is when it has two lists. - */ - template - [[nodiscard]] std::enable_if_t min_list_length() const - { - static_assert(RANDOM_ == RANDOM, "Don't set SFINAE parameter!"); - - return length[0] < length[1] ? length[0] : length[1]; + else + { + UNUSED(key); + UNUSED(domesticate); + } } }; } // namespace freelist diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/globalalloc.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/globalalloc.h index b898eed9f12f..dc9528f66ada 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/globalalloc.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/globalalloc.h @@ -5,18 +5,18 @@ namespace snmalloc { - template + template inline static void cleanup_unused() { #ifndef SNMALLOC_PASS_THROUGH static_assert( - SharedStateHandle::Options.CoreAllocIsPoolAllocated, + Config::Options.CoreAllocIsPoolAllocated, "Global cleanup is available only for pool-allocated configurations"); // Call this periodically to free and coalesce memory allocated by // allocators that are not currently in use by any thread. // One atomic operation to extract the stack, another to restore it. // Handling the message queue for each stack is non-atomic. - auto* first = AllocPool::extract(); + auto* first = AllocPool::extract(); auto* alloc = first; decltype(alloc) last; @@ -26,10 +26,10 @@ namespace snmalloc { alloc->flush(); last = alloc; - alloc = AllocPool::extract(alloc); + alloc = AllocPool::extract(alloc); } - AllocPool::restore(first, last); + AllocPool::restore(first, last); } #endif } @@ -39,16 +39,16 @@ namespace snmalloc allocators are empty. If you don't pass a pointer to a bool, then will raise an error all the allocators are not empty. */ - template + template inline static void debug_check_empty(bool* result = nullptr) { #ifndef SNMALLOC_PASS_THROUGH static_assert( - SharedStateHandle::Options.CoreAllocIsPoolAllocated, + Config::Options.CoreAllocIsPoolAllocated, "Global status is available only for pool-allocated configurations"); // This is a debugging function. It checks that all memory from all // allocators has been freed. - auto* alloc = AllocPool::iterate(); + auto* alloc = AllocPool::iterate(); # ifdef SNMALLOC_TRACING message<1024>("debug check empty: first {}", alloc); @@ -62,7 +62,7 @@ namespace snmalloc message<1024>("debug_check_empty: Check all allocators!"); # endif done = true; - alloc = AllocPool::iterate(); + alloc = AllocPool::iterate(); okay = true; while (alloc != nullptr) @@ -83,7 +83,7 @@ namespace snmalloc # ifdef SNMALLOC_TRACING message<1024>("debug check empty: okay = {}", okay); # endif - alloc = AllocPool::iterate(alloc); + alloc = AllocPool::iterate(alloc); } } @@ -96,11 +96,11 @@ namespace snmalloc // Redo check so abort is on allocator with allocation left. if (!okay) { - alloc = AllocPool::iterate(); + alloc = AllocPool::iterate(); while (alloc != nullptr) { alloc->debug_is_empty(nullptr); - alloc = AllocPool::iterate(alloc); + alloc = AllocPool::iterate(alloc); } } #else @@ -108,13 +108,13 @@ namespace snmalloc #endif } - template + template inline static void debug_in_use(size_t count) { static_assert( - SharedStateHandle::Options.CoreAllocIsPoolAllocated, + Config::Options.CoreAllocIsPoolAllocated, "Global status is available only for pool-allocated configurations"); - auto alloc = AllocPool::iterate(); + auto alloc = AllocPool::iterate(); while (alloc != nullptr) { if (alloc->debug_is_in_use()) @@ -125,7 +125,7 @@ namespace snmalloc } count--; } - alloc = AllocPool::iterate(alloc); + alloc = AllocPool::iterate(alloc); if (count != 0) { diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/localalloc.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/localalloc.h index 086fd8a01617..c85d30b2b8f5 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/localalloc.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/localalloc.h @@ -1,7 +1,9 @@ #pragma once -#ifdef _MSC_VER -# define ALLOCATOR __declspec(allocator) +#if defined(_MSC_VER) +# define ALLOCATOR __declspec(allocator) __declspec(restrict) +#elif __has_attribute(malloc) +# define ALLOCATOR __attribute__((malloc)) #else # define ALLOCATOR #endif @@ -56,11 +58,11 @@ namespace snmalloc * core allocator must be provided externally by invoking the `init` method * on this class *before* any allocation-related methods are called. */ - template + template class LocalAllocator { public: - using StateHandle = Backend; + using Config = Config_; private: /** @@ -68,15 +70,15 @@ namespace snmalloc * specialised for the back-end that we are using. * @{ */ - using CoreAlloc = CoreAllocator; - using PagemapEntry = typename Backend::Pagemap::Entry; + using CoreAlloc = CoreAllocator; + using PagemapEntry = typename Config::PagemapEntry; /// }@ // Free list per small size class. These are used for // allocation on the fast path. This part of the code is inspired by // mimalloc. // Also contains remote deallocation cache. - LocalCache local_cache{&Backend::unused_remote}; + LocalCache local_cache{&Config::unused_remote}; // Underlying allocator for most non-fast path operations. CoreAlloc* core_alloc{nullptr}; @@ -120,7 +122,7 @@ namespace snmalloc SNMALLOC_SLOW_PATH decltype(auto) lazy_init(Action action, Args... args) { SNMALLOC_ASSERT(core_alloc == nullptr); - if constexpr (!Backend::Options.LocalAllocSupportsLazyInit) + if constexpr (!Config::Options.LocalAllocSupportsLazyInit) { SNMALLOC_CHECK( false && @@ -133,7 +135,7 @@ namespace snmalloc else { // Initialise the thread local allocator - if constexpr (Backend::Options.CoreAllocOwnsLocalState) + if constexpr (Config::Options.CoreAllocOwnsLocalState) { init(); } @@ -145,7 +147,7 @@ namespace snmalloc // Must be called at least once per thread. // A pthread implementation only calls the thread destruction handle // if the key has been set. - Backend::register_clean_up(); + Config::register_clean_up(); // Perform underlying operation auto r = action(core_alloc, args...); @@ -184,7 +186,7 @@ namespace snmalloc return check_init([&](CoreAlloc* core_alloc) { // Grab slab of correct size // Set remote as large allocator remote. - auto [chunk, meta] = Backend::alloc_chunk( + auto [chunk, meta] = Config::Backend::alloc_chunk( core_alloc->get_backend_local_state(), large_size_to_chunk_size(size), PagemapEntry::encode( @@ -197,11 +199,15 @@ namespace snmalloc // Initialise meta data for a successful large allocation. if (meta != nullptr) - meta->initialise_large(); + { + meta->initialise_large( + address_cast(chunk), local_cache.entropy.get_free_list_key()); + core_alloc->laden.insert(meta); + } if (zero_mem == YesZero && chunk.unsafe_ptr() != nullptr) { - Backend::Pal::template zero( + Config::Pal::template zero( chunk.unsafe_ptr(), bits::next_pow2(size)); } @@ -212,10 +218,10 @@ namespace snmalloc template SNMALLOC_FAST_PATH capptr::Alloc small_alloc(size_t size) { - auto domesticate = [this]( - freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { - return capptr_domesticate(core_alloc->backend_state_ptr(), p); - }; + auto domesticate = + [this](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { + return capptr_domesticate(core_alloc->backend_state_ptr(), p); + }; auto slowpath = [&]( smallsizeclass_t sizeclass, freelist::Iter<>* fl) SNMALLOC_FAST_PATH_LAMBDA { @@ -239,7 +245,7 @@ namespace snmalloc sizeclass); }; - return local_cache.template alloc( + return local_cache.template alloc( domesticate, size, slowpath); } @@ -271,9 +277,9 @@ namespace snmalloc alloc_size(p.unsafe_ptr())); #endif const PagemapEntry& entry = - Backend::Pagemap::get_metaentry(address_cast(p)); + Config::Backend::template get_metaentry(address_cast(p)); local_cache.remote_dealloc_cache.template dealloc( - entry.get_remote()->trunc_id(), p, key_global); + entry.get_remote()->trunc_id(), p); post_remote_cache(); return; } @@ -300,13 +306,13 @@ namespace snmalloc } /** - * Call `Backend::is_initialised()` if it is implemented, + * Call `Config::is_initialised()` if it is implemented, * unconditionally returns true otherwise. */ SNMALLOC_FAST_PATH bool is_initialised() { - return call_is_initialised(nullptr, 0); + return call_is_initialised(nullptr, 0); } /** @@ -329,13 +335,13 @@ namespace snmalloc {} /** - * Call `Backend::ensure_init()` if it is implemented, do + * Call `Config::ensure_init()` if it is implemented, do * nothing otherwise. */ SNMALLOC_FAST_PATH void ensure_init() { - call_ensure_init(nullptr, 0); + call_ensure_init(nullptr, 0); } public: @@ -380,7 +386,7 @@ namespace snmalloc // Initialise the global allocator structures ensure_init(); // Grab an allocator for this thread. - init(AllocPool::acquire(&(this->local_cache))); + init(AllocPool::acquire(&(this->local_cache))); } // Return all state in the fast allocator and release the underlying @@ -400,9 +406,9 @@ namespace snmalloc // Detach underlying allocator core_alloc->attached_cache = nullptr; // Return underlying allocator to the system. - if constexpr (Backend::Options.CoreAllocOwnsLocalState) + if constexpr (Config::Options.CoreAllocOwnsLocalState) { - AllocPool::release(core_alloc); + AllocPool::release(core_alloc); } // Set up thread local allocator to look like @@ -411,7 +417,7 @@ namespace snmalloc #ifdef SNMALLOC_TRACING message<1024>("flush(): core_alloc={}", core_alloc); #endif - local_cache.remote_allocator = &Backend::unused_remote; + local_cache.remote_allocator = &Config::unused_remote; local_cache.remote_dealloc_cache.capacity = 0; } } @@ -461,9 +467,9 @@ namespace snmalloc * point where we know, from the pagemap, or by explicitly testing, that the * pointer under test is not nullptr. */ -#if defined(__CHERI_PURE_CAPABILITY__) && defined(SNMALLOC_CHECK_CLIENT) - SNMALLOC_SLOW_PATH void dealloc_cheri_checks(void* p) + SNMALLOC_FAST_PATH void dealloc_cheri_checks(void* p) { +#if defined(__CHERI_PURE_CAPABILITY__) /* * Enforce the use of an unsealed capability. * @@ -471,7 +477,9 @@ namespace snmalloc * elide this test in that world. */ snmalloc_check_client( - !__builtin_cheri_sealed_get(p), "Sealed capability in deallocation"); + mitigations(cheri_checks), + !__builtin_cheri_sealed_get(p), + "Sealed capability in deallocation"); /* * Enforce permissions on the returned pointer. These pointers end up in @@ -490,6 +498,7 @@ namespace snmalloc static const size_t reqperm = CHERI_PERM_LOAD | CHERI_PERM_STORE | CHERI_PERM_LOAD_CAP | CHERI_PERM_STORE_CAP; snmalloc_check_client( + mitigations(cheri_checks), (__builtin_cheri_perms_get(p) & reqperm) == reqperm, "Insufficient permissions on capability in deallocation"); @@ -504,13 +513,16 @@ namespace snmalloc * elide this test. */ snmalloc_check_client( - __builtin_cheri_tag_get(p), "Untagged capability in deallocation"); + mitigations(cheri_checks), + __builtin_cheri_tag_get(p), + "Untagged capability in deallocation"); /* * Verify that the capability is not zero-length, ruling out the other * edge case around monotonicity. */ snmalloc_check_client( + mitigations(cheri_checks), __builtin_cheri_length_get(p) > 0, "Zero-length capability in deallocation"); @@ -579,8 +591,10 @@ namespace snmalloc * acceptable security posture for the allocator and between clients; * misbehavior is confined to the misbehaving client. */ - } +#else + UNUSED(p); #endif + } SNMALLOC_FAST_PATH void dealloc(void* p_raw) { @@ -625,15 +639,15 @@ namespace snmalloc * deal with the object's extent. */ capptr::Alloc p_tame = - capptr_domesticate(core_alloc->backend_state_ptr(), p_wild); + capptr_domesticate(core_alloc->backend_state_ptr(), p_wild); const PagemapEntry& entry = - Backend::Pagemap::get_metaentry(address_cast(p_tame)); + Config::Backend::get_metaentry(address_cast(p_tame)); + if (SNMALLOC_LIKELY(local_cache.remote_allocator == entry.get_remote())) { -# if defined(__CHERI_PURE_CAPABILITY__) && defined(SNMALLOC_CHECK_CLIENT) dealloc_cheri_checks(p_tame.unsafe_ptr()); -# endif + if (SNMALLOC_LIKELY(CoreAlloc::dealloc_local_object_fast( entry, p_tame, local_cache.entropy))) return; @@ -644,14 +658,19 @@ namespace snmalloc RemoteAllocator* remote = entry.get_remote(); if (SNMALLOC_LIKELY(remote != nullptr)) { -# if defined(__CHERI_PURE_CAPABILITY__) && defined(SNMALLOC_CHECK_CLIENT) dealloc_cheri_checks(p_tame.unsafe_ptr()); -# endif + + // Detect double free of large allocations here. + snmalloc_check_client( + mitigations(sanity_checks), + !entry.is_backend_owned(), + "Memory corruption detected"); + // Check if we have space for the remote deallocation if (local_cache.remote_dealloc_cache.reserve_space(entry)) { local_cache.remote_dealloc_cache.template dealloc( - remote->trunc_id(), p_tame, key_global); + remote->trunc_id(), p_tame); # ifdef SNMALLOC_TRACING message<1024>( "Remote dealloc fast {} ({})", p_raw, alloc_size(p_raw)); @@ -666,7 +685,10 @@ namespace snmalloc // If p_tame is not null, then dealloc has been call on something // it shouldn't be called on. // TODO: Should this be tested even in the !CHECK_CLIENT case? - snmalloc_check_client(p_tame == nullptr, "Not allocated by snmalloc."); + snmalloc_check_client( + mitigations(sanity_checks), + p_tame == nullptr, + "Not allocated by snmalloc."); # ifdef SNMALLOC_TRACING message<1024>("nullptr deallocation"); @@ -675,16 +697,41 @@ namespace snmalloc #endif } + void check_size(void* p, size_t size) + { +#ifdef SNMALLOC_PASS_THROUGH + UNUSED(p, size); +#else + if constexpr (mitigations(sanity_checks)) + { + size = size == 0 ? 1 : size; + auto sc = size_to_sizeclass_full(size); + auto pm_sc = + Config::Backend::get_metaentry(address_cast(p)).get_sizeclass(); + auto rsize = sizeclass_full_to_size(sc); + auto pm_size = sizeclass_full_to_size(pm_sc); + snmalloc_check_client( + mitigations(sanity_checks), + sc == pm_sc, + "Dealloc rounded size mismatch: {} != {}", + rsize, + pm_size); + } + else + UNUSED(p, size); +#endif + } + SNMALLOC_FAST_PATH void dealloc(void* p, size_t s) { - UNUSED(s); + check_size(p, s); dealloc(p); } template SNMALLOC_FAST_PATH void dealloc(void* p) { - UNUSED(size); + check_size(p, size); dealloc(p); } @@ -707,7 +754,7 @@ namespace snmalloc #else // TODO What's the domestication policy here? At the moment we just // probe the pagemap with the raw address, without checks. There could - // be implicit domestication through the `Backend::Pagemap` or + // be implicit domestication through the `Config::Pagemap` or // we could just leave well enough alone. // Note that alloc_size should return 0 for nullptr. @@ -718,7 +765,7 @@ namespace snmalloc // entry for the first chunk of memory, that states it represents a // large object, so we can pull the check for null off the fast path. const PagemapEntry& entry = - Backend::Pagemap::get_metaentry(address_cast(p_raw)); + Config::Backend::template get_metaentry(address_cast(p_raw)); return sizeclass_full_to_size(entry.get_sizeclass()); #endif @@ -734,22 +781,31 @@ namespace snmalloc template void* external_pointer(void* p) { - // Note that each case uses `pointer_offset`, so that on - // CHERI it is monotone with respect to the capability. - // Note that the returned pointer could be outside the CHERI - // bounds of `p`, and thus not something that can be followed. + /* + * Note that: + * * each case uses `pointer_offset`, so that on CHERI, our behaviour is + * monotone with respect to the capability `p`. + * + * * the returned pointer could be outside the CHERI bounds of `p`, and + * thus not something that can be followed. + * + * * we don't use capptr_from_client()/capptr_reveal(), to avoid the + * syntactic clutter. By inspection, `p` flows only to address_cast + * and pointer_offset, and so there's no risk that we follow or act + * to amplify the rights carried by `p`. + */ if constexpr (location == Start) { - size_t index = index_in_object(p); + size_t index = index_in_object(address_cast(p)); return pointer_offset(p, 0 - index); } else if constexpr (location == End) { - return pointer_offset(p, remaining_bytes(p) - 1); + return pointer_offset(p, remaining_bytes(address_cast(p)) - 1); } else { - return pointer_offset(p, remaining_bytes(p)); + return pointer_offset(p, remaining_bytes(address_cast(p))); } } @@ -759,24 +815,25 @@ namespace snmalloc * auto p = (char*)malloc(size) * remaining_bytes(p + n) == size - n provided n < size */ - size_t remaining_bytes(const void* p) + size_t remaining_bytes(address_t p) { #ifndef SNMALLOC_PASS_THROUGH const PagemapEntry& entry = - Backend::Pagemap::template get_metaentry(address_cast(p)); + Config::Backend::template get_metaentry(p); auto sizeclass = entry.get_sizeclass(); - return snmalloc::remaining_bytes(sizeclass, address_cast(p)); + return snmalloc::remaining_bytes(sizeclass, p); #else - return pointer_diff(p, reinterpret_cast(UINTPTR_MAX)); + return reinterpret_cast( + std::numeric_limits::max() - p); #endif } bool check_bounds(const void* p, size_t s) { - if (SNMALLOC_LIKELY(Backend::Pagemap::is_initialised())) + if (SNMALLOC_LIKELY(Config::is_initialised())) { - return remaining_bytes(p) >= s; + return remaining_bytes(address_cast(p)) >= s; } return true; } @@ -787,14 +844,14 @@ namespace snmalloc * auto p = (char*)malloc(size) * index_in_object(p + n) == n provided n < size */ - size_t index_in_object(const void* p) + size_t index_in_object(address_t p) { #ifndef SNMALLOC_PASS_THROUGH const PagemapEntry& entry = - Backend::Pagemap::template get_metaentry(address_cast(p)); + Config::Backend::template get_metaentry(p); auto sizeclass = entry.get_sizeclass(); - return snmalloc::index_in_object(sizeclass, address_cast(p)); + return snmalloc::index_in_object(sizeclass, p); #else return reinterpret_cast(p); #endif diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/localcache.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/localcache.h index 2ae8ffd682d4..cfbbaa576f2f 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/localcache.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/localcache.h @@ -19,15 +19,14 @@ namespace snmalloc return p.as_void(); } - template + template inline static SNMALLOC_FAST_PATH capptr::Alloc finish_alloc(freelist::HeadPtr p, smallsizeclass_t sizeclass) { auto r = finish_alloc_no_zero(p, sizeclass); if constexpr (zero_mem == YesZero) - SharedStateHandle::Pal::zero( - r.unsafe_ptr(), sizeclass_to_size(sizeclass)); + Config::Pal::zero(r.unsafe_ptr(), sizeclass_to_size(sizeclass)); // TODO: Should this be zeroing the free Object state, in the non-zeroing // case? @@ -64,18 +63,14 @@ namespace snmalloc /** * Return all the free lists to the allocator. Used during thread teardown. */ - template< - size_t allocator_size, - typename SharedStateHandle, - typename DeallocFun> - bool flush( - typename SharedStateHandle::LocalState* local_state, DeallocFun dealloc) + template + bool flush(typename Config::LocalState* local_state, DeallocFun dealloc) { auto& key = entropy.get_free_list_key(); - auto domesticate = - [local_state](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { - return capptr_domesticate(local_state, p); - }; + auto domesticate = [local_state](freelist::QueuePtr p) + SNMALLOC_FAST_PATH_LAMBDA { + return capptr_domesticate(local_state, p); + }; for (size_t i = 0; i < NUM_SMALL_SIZECLASSES; i++) { @@ -90,13 +85,13 @@ namespace snmalloc } } - return remote_dealloc_cache.post( - local_state, remote_allocator->trunc_id(), key_global); + return remote_dealloc_cache.post( + local_state, remote_allocator->trunc_id()); } template< ZeroMem zero_mem, - typename SharedStateHandle, + typename Config, typename Slowpath, typename Domesticator> SNMALLOC_FAST_PATH capptr::Alloc @@ -108,7 +103,7 @@ namespace snmalloc if (SNMALLOC_LIKELY(!fl.empty())) { auto p = fl.take(key, domesticate); - return finish_alloc(p, sizeclass); + return finish_alloc(p, sizeclass); } return slowpath(sizeclass, &fl); } diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/metadata.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/metadata.h index ca90ec1af62d..8b1314e2e17d 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/metadata.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/metadata.h @@ -360,34 +360,44 @@ namespace snmalloc } }; + /** + * FrontendSlabMetadata_Trait + * + * Used for static checks of inheritance as FrontendSlabMetadata is templated. + */ + class FrontendSlabMetadata_Trait + { + private: + template + friend class FrontendSlabMetadata; + + // Can only be constructed by FrontendSlabMetadata + FrontendSlabMetadata_Trait() = default; + }; + /** * The FrontendSlabMetadata represent the metadata associated with a single * slab. */ - class alignas(CACHELINE_SIZE) FrontendSlabMetadata + template + class FrontendSlabMetadata : public FrontendSlabMetadata_Trait { public: /** * Used to link slab metadata together in various other data-structures. - * This is intended to be used with `SeqSet` and so may actually hold a - * subclass of this class provided by the back end. The `SeqSet` is - * responsible for maintaining that invariant. While an instance of this - * class is in a `SeqSet`, the `next` field should not be assigned to by - * anything that doesn't enforce the invariant that `next` stores a `T*`, - * where `T` is a subclass of `FrontendSlabMetadata`. + * This is used with `SeqSet` and so may actually hold a subclass of this + * class provided by the back end. The `SeqSet` is responsible for + * maintaining that invariant. */ - FrontendSlabMetadata* next{nullptr}; + typename SeqSet::Node node; constexpr FrontendSlabMetadata() = default; /** * Data-structure for building the free list for this slab. */ -#ifdef SNMALLOC_CHECK_CLIENT - freelist::Builder free_queue; -#else - freelist::Builder free_queue; -#endif + SNMALLOC_NO_UNIQUE_ADDRESS freelist::Builder + free_queue; /** * The number of deallocation required until we hit a slow path. This @@ -427,9 +437,13 @@ namespace snmalloc /** * Initialise FrontendSlabMetadata for a slab. */ - void initialise(smallsizeclass_t sizeclass) + void initialise( + smallsizeclass_t sizeclass, address_t slab, const FreeListKey& key) { - free_queue.init(); + static_assert( + std::is_base_of::value, + "Template should be a subclass of FrontendSlabMetadata"); + free_queue.init(slab, key); // Set up meta data as if the entire slab has been turned into a free // list. This means we don't have to check for special cases where we have // returned all the elements, but this is a slab that is still being bump @@ -445,10 +459,10 @@ namespace snmalloc * * Set needed so immediately moves to slow path. */ - void initialise_large() + void initialise_large(address_t slab, const FreeListKey& key) { // We will push to this just to make the fast path clean. - free_queue.init(); + free_queue.init(slab, key); // Flag to detect that it is a large alloc on the slow path large_ = true; @@ -549,11 +563,10 @@ namespace snmalloc auto p = tmp_fl.take(key, domesticate); fast_free_list = tmp_fl; -#ifdef SNMALLOC_CHECK_CLIENT - entropy.refresh_bits(); -#else - UNUSED(entropy); -#endif + if constexpr (mitigations(random_preserve)) + entropy.refresh_bits(); + else + UNUSED(entropy); // This marks the slab as sleeping, and sets a wakeup // when sufficient deallocations have occurred to this slab. @@ -563,6 +576,13 @@ namespace snmalloc return {p, !sleeping}; } + + // Returns a pointer to somewhere in the slab. May not be the + // start of the slab. + [[nodiscard]] address_t get_slab_interior(const FreeListKey& key) const + { + return address_cast(free_queue.read_head(0, key)); + } }; /** @@ -576,11 +596,13 @@ namespace snmalloc * Ensure that the template parameter is valid. */ static_assert( - std::is_convertible_v, + std::is_convertible_v, "The front end requires that the back end provides slab metadata that is " "compatible with the front-end's structure"); public: + using SlabMetadata = BackendSlabMetadata; + constexpr FrontendMetaEntry() = default; /** diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/pool.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/pool.h index 119777a7ca93..36737207db2d 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/pool.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/pool.h @@ -22,14 +22,18 @@ namespace snmalloc { template< typename TT, - SNMALLOC_CONCEPT(ConceptBackendGlobals) SharedStateHandle, + SNMALLOC_CONCEPT(IsConfig) Config, PoolState& get_state()> friend class Pool; private: - MPMCStack stack; + // Queue of elements in not currently in use + // Must hold lock to modify + capptr::Alloc front{nullptr}; + capptr::Alloc back{nullptr}; + FlagWord lock{}; - T* list{nullptr}; + capptr::Alloc list{nullptr}; public: constexpr PoolState() = default; @@ -41,9 +45,7 @@ namespace snmalloc * SingletonPoolState::pool is the default provider for the PoolState within * the Pool class. */ - template< - typename T, - SNMALLOC_CONCEPT(ConceptBackendGlobals) SharedStateHandle> + template class SingletonPoolState { /** @@ -55,8 +57,8 @@ namespace snmalloc -> decltype(SharedStateHandle_::ensure_init()) { static_assert( - std::is_same::value, - "SFINAE parameter, should only be used with SharedStateHandle"); + std::is_same::value, + "SFINAE parameter, should only be used with Config"); SharedStateHandle_::ensure_init(); } @@ -68,17 +70,17 @@ namespace snmalloc SNMALLOC_FAST_PATH static auto call_ensure_init(SharedStateHandle_*, long) { static_assert( - std::is_same::value, - "SFINAE parameter, should only be used with SharedStateHandle"); + std::is_same::value, + "SFINAE parameter, should only be used with Config"); } /** - * Call `SharedStateHandle::ensure_init()` if it is implemented, do nothing + * Call `Config::ensure_init()` if it is implemented, do nothing * otherwise. */ SNMALLOC_FAST_PATH static void ensure_init() { - call_ensure_init(nullptr, 0); + call_ensure_init(nullptr, 0); } static void make_pool(PoolState*) noexcept @@ -114,8 +116,8 @@ namespace snmalloc */ template< typename T, - SNMALLOC_CONCEPT(ConceptBackendGlobals) SharedStateHandle, - PoolState& get_state() = SingletonPoolState::pool> + SNMALLOC_CONCEPT(IsConfig) Config, + PoolState& get_state() = SingletonPoolState::pool> class Pool { public: @@ -123,31 +125,39 @@ namespace snmalloc static T* acquire(Args&&... args) { PoolState& pool = get_state(); - T* p = pool.stack.pop(); - - if (p != nullptr) { - p->set_in_use(); - return p; + FlagLock f(pool.lock); + if (pool.front != nullptr) + { + auto p = pool.front; + auto next = p->next; + if (next == nullptr) + { + pool.back = nullptr; + } + pool.front = next; + p->set_in_use(); + return p.unsafe_ptr(); + } } auto raw = - SharedStateHandle::template alloc_meta_data(nullptr, sizeof(T)); + Config::Backend::template alloc_meta_data(nullptr, sizeof(T)); if (raw == nullptr) { - SharedStateHandle::Pal::error( - "Failed to initialise thread local allocator."); + Config::Pal::error("Failed to initialise thread local allocator."); } - p = new (raw.unsafe_ptr()) T(std::forward(args)...); + auto p = capptr::Alloc::unsafe_from(new (raw.unsafe_ptr()) + T(std::forward(args)...)); FlagLock f(pool.lock); p->list_next = pool.list; pool.list = p; p->set_in_use(); - return p; + return p.unsafe_ptr(); } /** @@ -161,16 +171,23 @@ namespace snmalloc // is returned without the constructor being run, so the object is reused // without re-initialisation. p->reset_in_use(); - get_state().stack.push(p); + restore(p, p); } static T* extract(T* p = nullptr) { + PoolState& pool = get_state(); // Returns a linked list of all objects in the stack, emptying the stack. if (p == nullptr) - return get_state().stack.pop_all(); + { + FlagLock f(pool.lock); + auto result = pool.front; + pool.front = nullptr; + pool.back = nullptr; + return result.unsafe_ptr(); + } - return p->next; + return p->next.unsafe_ptr(); } /** @@ -180,17 +197,88 @@ namespace snmalloc */ static void restore(T* first, T* last) { - // Pushes a linked list of objects onto the stack. Use to put a linked - // list returned by extract back onto the stack. - get_state().stack.push(first, last); + PoolState& pool = get_state(); + last->next = nullptr; + FlagLock f(pool.lock); + + if (pool.front == nullptr) + { + pool.front = capptr::Alloc::unsafe_from(first); + } + else + { + pool.back->next = capptr::Alloc::unsafe_from(first); + } + + pool.back = capptr::Alloc::unsafe_from(last); + } + + /** + * Return to the pool a list of object previously retrieved by `extract` + * + * Do not return objects from `acquire`. + */ + static void restore_front(T* first, T* last) + { + PoolState& pool = get_state(); + last->next = nullptr; + FlagLock f(pool.lock); + + if (pool.front == nullptr) + { + pool.back = capptr::Alloc::unsafe_from(last); + } + else + { + last->next = pool.front; + pool.back->next = capptr::Alloc::unsafe_from(first); + } + pool.front = capptr::Alloc::unsafe_from(first); } static T* iterate(T* p = nullptr) { if (p == nullptr) - return get_state().list; + return get_state().list.unsafe_ptr(); + + return p->list_next.unsafe_ptr(); + } + + /** + * Put the stack in a consistent order. This is helpful for systematic + * testing based systems. It is not thread safe, and the caller should + * ensure no other thread can be performing a `sort` concurrently with this + * call. + */ + static void sort() + { + // Marker is used to signify free elements. + auto marker = capptr::Alloc::unsafe_from(reinterpret_cast(1)); - return p->list_next; + // Extract all the elements and mark them as free. + T* curr = extract(); + T* prev = nullptr; + while (curr != nullptr) + { + prev = curr; + curr = extract(curr); + // Assignment must occur after extract, otherwise extract would read the + // marker + prev->next = marker; + } + + // Build a list of the free elements in the correct order. + // This is the opposite order to the list of all elements + // so that iterate works correctly. + curr = iterate(); + while (curr != nullptr) + { + if (curr->next == marker) + { + restore_front(curr, curr); + } + curr = iterate(curr); + } } }; } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/pooled.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/pooled.h index ac1af5d1ca9b..a812bc924cb1 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/pooled.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/pooled.h @@ -14,35 +14,33 @@ namespace snmalloc public: template< typename TT, - SNMALLOC_CONCEPT(ConceptBackendGlobals) SharedStateHandle, + SNMALLOC_CONCEPT(IsConfig) Config, PoolState& get_state()> friend class Pool; - template - friend class MPMCStack; /// Used by the pool for chaining together entries when not in use. - std::atomic next{nullptr}; + capptr::Alloc next{nullptr}; /// Used by the pool to keep the list of all entries ever created. - T* list_next; - std::atomic_flag in_use = ATOMIC_FLAG_INIT; + capptr::Alloc list_next; + std::atomic in_use{false}; public: void set_in_use() { - if (in_use.test_and_set()) + if (in_use.exchange(true)) error("Critical error: double use of Pooled Type!"); } void reset_in_use() { - in_use.clear(); + in_use.store(false); } bool debug_is_in_use() { - bool result = in_use.test_and_set(); + bool result = in_use.exchange(true); if (!result) - in_use.clear(); + in_use.store(false); return result; } }; diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/remoteallocator.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/remoteallocator.h index 2b92e9f6e2f5..2d15e6d1119a 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/remoteallocator.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/remoteallocator.h @@ -10,11 +10,6 @@ namespace snmalloc { - /** - * Global key for all remote lists. - */ - inline static FreeListKey key_global(0xdeadbeef, 0xbeefdead, 0xdeadbeef); - /** * * A RemoteAllocator is the message queue of freed objects. It exposes a MPSC @@ -44,6 +39,16 @@ namespace snmalloc */ struct alignas(REMOTE_MIN_ALIGN) RemoteAllocator { + /** + * Global key for all remote lists. + * + * Note that we use a single key for all remote free lists and queues. + * This is so that we do not have to recode next pointers when sending + * segments, and look up specific keys based on destination. This is + * potentially more performant, but could make it easier to guess the key. + */ + inline static FreeListKey key_global{0xdeadbeef, 0xbeefdead, 0xdeadbeef}; + using alloc_id_t = address_t; // Store the message queue on a separate cacheline. It is mutable data that @@ -51,36 +56,42 @@ namespace snmalloc alignas(CACHELINE_SIZE) freelist::AtomicQueuePtr back{nullptr}; // Store the two ends on different cache lines as access by different // threads. - alignas(CACHELINE_SIZE) freelist::QueuePtr front{nullptr}; + alignas(CACHELINE_SIZE) freelist::AtomicQueuePtr front{nullptr}; + // Fake first entry + freelist::Object::T stub{}; constexpr RemoteAllocator() = default; void invariant() { - SNMALLOC_ASSERT(back != nullptr); + SNMALLOC_ASSERT( + (address_cast(front.load()) == address_cast(&stub)) || + (back != nullptr)); } - void init(freelist::HeadPtr stub) + void init() { - freelist::Object::atomic_store_null(stub, key_global); - front = capptr_rewild(stub); - back.store(front, std::memory_order_relaxed); + freelist::HeadPtr stub_ptr = freelist::HeadPtr::unsafe_from(&stub); + freelist::Object::atomic_store_null(stub_ptr, key_global); + front.store(freelist::QueuePtr::unsafe_from(&stub)); + back.store(nullptr, std::memory_order_relaxed); invariant(); } freelist::QueuePtr destroy() { - freelist::QueuePtr fnt = front; + freelist::QueuePtr fnt = front.load(); back.store(nullptr, std::memory_order_relaxed); - front = nullptr; + if (address_cast(front.load()) == address_cast(&stub)) + return nullptr; return fnt; } - inline bool is_empty() + template + inline bool can_dequeue(Domesticator_head domesticate_head) { - freelist::QueuePtr bk = back.load(std::memory_order_relaxed); - - return bk == front; + return domesticate_head(front.load()) + ->atomic_read_next(key_global, domesticate_head) == nullptr; } /** @@ -94,11 +105,10 @@ namespace snmalloc void enqueue( freelist::HeadPtr first, freelist::HeadPtr last, - const FreeListKey& key, Domesticator_head domesticate_head) { invariant(); - freelist::Object::atomic_store_null(last, key); + freelist::Object::atomic_store_null(last, key_global); // Exchange needs to be acq_rel. // * It needs to be a release, so nullptr in next is visible. @@ -107,12 +117,14 @@ namespace snmalloc freelist::QueuePtr prev = back.exchange(capptr_rewild(last), std::memory_order_acq_rel); - freelist::Object::atomic_store_next(domesticate_head(prev), first, key); - } + if (SNMALLOC_LIKELY(prev != nullptr)) + { + freelist::Object::atomic_store_next( + domesticate_head(prev), first, key_global); + return; + } - freelist::QueuePtr peek() - { - return front; + front.store(capptr_rewild(first)); } /** @@ -128,21 +140,21 @@ namespace snmalloc typename Domesticator_queue, typename Cb> void dequeue( - const FreeListKey& key, Domesticator_head domesticate_head, Domesticator_queue domesticate_queue, Cb cb) { invariant(); - SNMALLOC_ASSERT(front != nullptr); + SNMALLOC_ASSERT(front.load() != nullptr); // Use back to bound, so we don't handle new entries. auto b = back.load(std::memory_order_relaxed); - freelist::HeadPtr curr = domesticate_head(front); + freelist::HeadPtr curr = domesticate_head(front.load()); while (address_cast(curr) != address_cast(b)) { - freelist::HeadPtr next = curr->atomic_read_next(key, domesticate_queue); + freelist::HeadPtr next = + curr->atomic_read_next(key_global, domesticate_queue); // We have observed a non-linearisable effect of the queue. // Just go back to allocating normally. if (SNMALLOC_UNLIKELY(next == nullptr)) diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/remotecache.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/remotecache.h index 01a275257e4f..96f5e09732a6 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/remotecache.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/remotecache.h @@ -17,7 +17,7 @@ namespace snmalloc */ struct RemoteDeallocCache { - std::array, REMOTE_SLOTS> list; + std::array, REMOTE_SLOTS> list; /** * The total amount of memory we are waiting for before we will dispatch @@ -66,29 +66,29 @@ namespace snmalloc } template - SNMALLOC_FAST_PATH void dealloc( - RemoteAllocator::alloc_id_t target_id, - capptr::Alloc p, - const FreeListKey& key) + SNMALLOC_FAST_PATH void + dealloc(RemoteAllocator::alloc_id_t target_id, capptr::Alloc p) { SNMALLOC_ASSERT(initialised); auto r = p.template as_reinterpret>(); - list[get_slot(target_id, 0)].add(r, key); + list[get_slot(target_id, 0)].add( + r, RemoteAllocator::key_global); } - template + template bool post( - typename Backend::LocalState* local_state, - RemoteAllocator::alloc_id_t id, - const FreeListKey& key) + typename Config::LocalState* local_state, RemoteAllocator::alloc_id_t id) { + // Use same key as the remote allocator, so segments can be + // posted to a remote allocator without reencoding. + const auto& key = RemoteAllocator::key_global; SNMALLOC_ASSERT(initialised); size_t post_round = 0; bool sent_something = false; auto domesticate = [local_state](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { - return capptr_domesticate(local_state, p); + return capptr_domesticate(local_state, p); }; while (true) @@ -104,24 +104,25 @@ namespace snmalloc { auto [first, last] = list[i].extract_segment(key); const auto& entry = - Backend::Pagemap::get_metaentry(address_cast(first)); + Config::Backend::get_metaentry(address_cast(first)); auto remote = entry.get_remote(); // If the allocator is not correctly aligned, then the bit that is // set implies this is used by the backend, and we should not be // deallocating memory here. snmalloc_check_client( + mitigations(sanity_checks), !entry.is_backend_owned(), "Delayed detection of attempt to free internal structure."); - if constexpr (Backend::Options.QueueHeadsAreTame) + if constexpr (Config::Options.QueueHeadsAreTame) { auto domesticate_nop = [](freelist::QueuePtr p) { - return freelist::HeadPtr(p.unsafe_ptr()); + return freelist::HeadPtr::unsafe_from(p.unsafe_ptr()); }; - remote->enqueue(first, last, key, domesticate_nop); + remote->enqueue(first, last, domesticate_nop); } else { - remote->enqueue(first, last, key, domesticate); + remote->enqueue(first, last, domesticate); } sent_something = true; } @@ -143,7 +144,7 @@ namespace snmalloc // Use the next N bits to spread out remote deallocs in our own // slot. auto r = resend.take(key, domesticate); - const auto& entry = Backend::Pagemap::get_metaentry(address_cast(r)); + const auto& entry = Config::Backend::get_metaentry(address_cast(r)); auto i = entry.get_remote()->trunc_id(); size_t slot = get_slot(i, post_round); list[slot].add(r, key); @@ -172,7 +173,9 @@ namespace snmalloc #endif for (auto& l : list) { - l.init(); + // We do not need to initialise with a particular slab, so pass + // a null address. + l.init(0, RemoteAllocator::key_global); } capacity = REMOTE_CACHE; } diff --git a/3rdparty/exported/snmalloc/src/snmalloc/mem/sizeclasstable.h b/3rdparty/exported/snmalloc/src/snmalloc/mem/sizeclasstable.h index b257d0775370..2037443223d2 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/mem/sizeclasstable.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/mem/sizeclasstable.h @@ -18,7 +18,7 @@ namespace snmalloc using smallsizeclass_t = size_t; using chunksizeclass_t = size_t; - constexpr static inline smallsizeclass_t size_to_sizeclass_const(size_t size) + static constexpr smallsizeclass_t size_to_sizeclass_const(size_t size) { // Don't use sizeclasses that are not a multiple of the alignment. // For example, 24 byte allocations can be @@ -31,22 +31,22 @@ namespace snmalloc return sc; } - static constexpr size_t NUM_SMALL_SIZECLASSES = + constexpr size_t NUM_SMALL_SIZECLASSES = size_to_sizeclass_const(MAX_SMALL_SIZECLASS_SIZE); // Large classes range from [MAX_SMALL_SIZECLASS_SIZE, ADDRESS_SPACE). - static constexpr size_t NUM_LARGE_CLASSES = - Pal::address_bits - MAX_SMALL_SIZECLASS_BITS; + constexpr size_t NUM_LARGE_CLASSES = + DefaultPal::address_bits - MAX_SMALL_SIZECLASS_BITS; // How many bits are required to represent either a large or a small // sizeclass. - static constexpr size_t TAG_SIZECLASS_BITS = bits::max( + constexpr size_t TAG_SIZECLASS_BITS = bits::max( bits::next_pow2_bits_const(NUM_SMALL_SIZECLASSES + 1), bits::next_pow2_bits_const(NUM_LARGE_CLASSES + 1)); // Number of bits required to represent a tagged sizeclass that can be // either small or large. - static constexpr size_t SIZECLASS_REP_SIZE = + constexpr size_t SIZECLASS_REP_SIZE = bits::one_at_bit(TAG_SIZECLASS_BITS + 1); /** @@ -66,7 +66,7 @@ namespace snmalloc public: constexpr sizeclass_t() = default; - constexpr static sizeclass_t from_small_class(smallsizeclass_t sc) + static constexpr sizeclass_t from_small_class(smallsizeclass_t sc) { SNMALLOC_ASSERT(sc < TAG); // Note could use `+` or `|`. Using `+` as will combine nicely with array @@ -78,13 +78,13 @@ namespace snmalloc * Takes the number of leading zero bits from the actual large size-1. * See size_to_sizeclass_full */ - constexpr static sizeclass_t from_large_class(size_t large_class) + static constexpr sizeclass_t from_large_class(size_t large_class) { SNMALLOC_ASSERT(large_class < TAG); return {large_class}; } - constexpr static sizeclass_t from_raw(size_t raw) + static constexpr sizeclass_t from_raw(size_t raw) { return {raw}; } @@ -120,11 +120,16 @@ namespace snmalloc { return value == 0; } + + constexpr bool operator==(sizeclass_t other) + { + return value == other.value; + } }; using sizeclass_compress_t = uint8_t; - inline SNMALLOC_FAST_PATH static size_t + constexpr SNMALLOC_FAST_PATH static size_t aligned_size(size_t alignment, size_t size) { // Client responsible for checking alignment is not zero @@ -220,12 +225,9 @@ namespace snmalloc meta_slow.capacity = static_cast((meta.slab_mask + 1) / rsize); - meta_slow.waking = -#ifdef SNMALLOC_CHECK_CLIENT - static_cast(meta_slow.capacity / 4); -#else + meta_slow.waking = mitigations(random_larger_thresholds) ? + static_cast(meta_slow.capacity / 4) : static_cast(bits::min((meta_slow.capacity / 4), 32)); -#endif if (meta_slow.capacity > max_capacity) { @@ -265,26 +267,26 @@ namespace snmalloc } }; - static inline constexpr SizeClassTable sizeclass_metadata = SizeClassTable(); + constexpr SizeClassTable sizeclass_metadata = SizeClassTable(); - static constexpr size_t DIV_MULT_SHIFT = sizeclass_metadata.DIV_MULT_SHIFT; + constexpr size_t DIV_MULT_SHIFT = sizeclass_metadata.DIV_MULT_SHIFT; - constexpr static inline size_t sizeclass_to_size(smallsizeclass_t sizeclass) + constexpr size_t sizeclass_to_size(smallsizeclass_t sizeclass) { return sizeclass_metadata.fast_small(sizeclass).size; } - static inline size_t sizeclass_full_to_size(sizeclass_t sizeclass) + constexpr size_t sizeclass_full_to_size(sizeclass_t sizeclass) { return sizeclass_metadata.fast(sizeclass).size; } - inline static size_t sizeclass_full_to_slab_size(sizeclass_t sizeclass) + constexpr size_t sizeclass_full_to_slab_size(sizeclass_t sizeclass) { return sizeclass_metadata.fast(sizeclass).slab_mask + 1; } - inline static size_t sizeclass_to_slab_size(smallsizeclass_t sizeclass) + constexpr size_t sizeclass_to_slab_size(smallsizeclass_t sizeclass) { return sizeclass_metadata.fast_small(sizeclass).slab_mask + 1; } @@ -296,7 +298,7 @@ namespace snmalloc * * It also increases entropy, when we have randomisation. */ - inline uint16_t threshold_for_waking_slab(smallsizeclass_t sizeclass) + constexpr uint16_t threshold_for_waking_slab(smallsizeclass_t sizeclass) { return sizeclass_metadata.slow(sizeclass_t::from_small_class(sizeclass)) .waking; @@ -309,7 +311,7 @@ namespace snmalloc return bits::next_pow2_bits(ssize) - MIN_CHUNK_BITS; } - inline static size_t slab_sizeclass_to_size(chunksizeclass_t sizeclass) + constexpr size_t slab_sizeclass_to_size(chunksizeclass_t sizeclass) { return bits::one_at_bit(MIN_CHUNK_BITS + sizeclass); } @@ -318,20 +320,19 @@ namespace snmalloc * For large allocations, the metaentry stores the raw log_2 of the size, * which must be shifted into the index space of slab_sizeclass-es. */ - inline static size_t + constexpr size_t metaentry_chunk_sizeclass_to_slab_sizeclass(chunksizeclass_t sizeclass) { return sizeclass - MIN_CHUNK_BITS; } - inline constexpr static uint16_t - sizeclass_to_slab_object_count(smallsizeclass_t sizeclass) + constexpr uint16_t sizeclass_to_slab_object_count(smallsizeclass_t sizeclass) { return sizeclass_metadata.slow(sizeclass_t::from_small_class(sizeclass)) .capacity; } - inline static address_t start_of_object(sizeclass_t sc, address_t addr) + constexpr address_t start_of_object(sizeclass_t sc, address_t addr) { auto meta = sizeclass_metadata.fast(sc); address_t slab_start = addr & ~meta.slab_mask; @@ -360,17 +361,17 @@ namespace snmalloc } } - inline static size_t index_in_object(sizeclass_t sc, address_t addr) + constexpr size_t index_in_object(sizeclass_t sc, address_t addr) { return addr - start_of_object(sc, addr); } - inline static size_t remaining_bytes(sizeclass_t sc, address_t addr) + constexpr size_t remaining_bytes(sizeclass_t sc, address_t addr) { return sizeclass_metadata.fast(sc).size - index_in_object(sc, addr); } - inline static bool is_start_of_object(sizeclass_t sc, address_t addr) + constexpr bool is_start_of_object(sizeclass_t sc, address_t addr) { size_t offset = addr & (sizeclass_full_to_slab_size(sc) - 1); @@ -400,47 +401,47 @@ namespace snmalloc return bits::next_pow2_bits(size) - MIN_CHUNK_BITS; } - constexpr static SNMALLOC_PURE size_t sizeclass_lookup_index(const size_t s) + constexpr SNMALLOC_PURE size_t sizeclass_lookup_index(const size_t s) { // We subtract and shift to reduce the size of the table, i.e. we don't have // to store a value for every size. return (s - 1) >> MIN_ALLOC_BITS; } - static inline smallsizeclass_t size_to_sizeclass(size_t size) + constexpr size_t sizeclass_lookup_size = + sizeclass_lookup_index(MAX_SMALL_SIZECLASS_SIZE); + + /** + * This struct is used to statically initialise a table for looking up + * the correct sizeclass. + */ + struct SizeClassLookup { - constexpr static size_t sizeclass_lookup_size = - sizeclass_lookup_index(MAX_SMALL_SIZECLASS_SIZE); + sizeclass_compress_t table[sizeclass_lookup_size] = {{}}; - /** - * This struct is used to statically initialise a table for looking up - * the correct sizeclass. - */ - struct SizeClassLookup + constexpr SizeClassLookup() { - sizeclass_compress_t table[sizeclass_lookup_size] = {{}}; - - constexpr SizeClassLookup() + size_t curr = 1; + for (sizeclass_compress_t sizeclass = 0; + sizeclass < NUM_SMALL_SIZECLASSES; + sizeclass++) { - size_t curr = 1; - for (sizeclass_compress_t sizeclass = 0; - sizeclass < NUM_SMALL_SIZECLASSES; - sizeclass++) + for (; curr <= sizeclass_metadata.fast_small(sizeclass).size; + curr += 1 << MIN_ALLOC_BITS) { - for (; curr <= sizeclass_metadata.fast_small(sizeclass).size; - curr += 1 << MIN_ALLOC_BITS) - { - auto i = sizeclass_lookup_index(curr); - if (i == sizeclass_lookup_size) - break; - table[i] = sizeclass; - } + auto i = sizeclass_lookup_index(curr); + if (i == sizeclass_lookup_size) + break; + table[i] = sizeclass; } } - }; + } + }; - static constexpr SizeClassLookup sizeclass_lookup = SizeClassLookup(); + constexpr SizeClassLookup sizeclass_lookup = SizeClassLookup(); + constexpr smallsizeclass_t size_to_sizeclass(size_t size) + { auto index = sizeclass_lookup_index(size); if (index < sizeclass_lookup_size) { diff --git a/3rdparty/exported/snmalloc/src/snmalloc/override/malloc-extensions.cc b/3rdparty/exported/snmalloc/src/snmalloc/override/malloc-extensions.cc index 1e0810b455ca..c40b070e05ee 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/override/malloc-extensions.cc +++ b/3rdparty/exported/snmalloc/src/snmalloc/override/malloc-extensions.cc @@ -6,8 +6,8 @@ using namespace snmalloc; void get_malloc_info_v1(malloc_info_v1* stats) { - auto curr = Globals::get_current_usage(); - auto peak = Globals::get_peak_usage(); + auto curr = Alloc::Config::Backend::get_current_usage(); + auto peak = Alloc::Config::Backend::get_peak_usage(); stats->current_memory_usage = curr; stats->peak_memory_usage = peak; } diff --git a/3rdparty/exported/snmalloc/src/snmalloc/override/malloc-extensions.h b/3rdparty/exported/snmalloc/src/snmalloc/override/malloc-extensions.h index 2fc677743fe1..f7429cd11f3d 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/override/malloc-extensions.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/override/malloc-extensions.h @@ -1,3 +1,4 @@ +#pragma once /** * Malloc extensions * diff --git a/3rdparty/exported/snmalloc/src/snmalloc/override/malloc.cc b/3rdparty/exported/snmalloc/src/snmalloc/override/malloc.cc index de0eadc9d710..512ba3dceb38 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/override/malloc.cc +++ b/3rdparty/exported/snmalloc/src/snmalloc/override/malloc.cc @@ -61,6 +61,12 @@ extern "C" return ThreadAlloc::get().alloc_size(ptr); } + SNMALLOC_EXPORT + size_t SNMALLOC_NAME_MANGLE(malloc_good_size)(size_t size) + { + return round_size(size); + } + SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(realloc)(void* ptr, size_t size) { auto& a = ThreadAlloc::get(); @@ -148,6 +154,8 @@ extern "C" } sz = bits::min(sz, a.alloc_size(*ptr)); + + SNMALLOC_ASSUME(*ptr != nullptr || sz == 0); // Guard memcpy as GCC is assuming not nullptr for ptr after the memcpy // otherwise. if (sz != 0) @@ -219,4 +227,22 @@ extern "C" return SNMALLOC_NAME_MANGLE(memalign)( OS_PAGE_SIZE, (size + OS_PAGE_SIZE - 1) & ~(OS_PAGE_SIZE - 1)); } + +#if __has_include() +# include +#endif +#if defined(__GLIBC__) && !defined(SNMALLOC_PASS_THROUGH) + // glibc uses these hooks to replace malloc. + // This is required when RTL_DEEPBIND is used and the library is + // LD_PRELOADed. + // See https://github.com/microsoft/snmalloc/issues/595 + SNMALLOC_EXPORT void (*SNMALLOC_NAME_MANGLE(__free_hook))(void* ptr) = + &SNMALLOC_NAME_MANGLE(free); + SNMALLOC_EXPORT void* (*SNMALLOC_NAME_MANGLE(__malloc_hook))(size_t size) = + &SNMALLOC_NAME_MANGLE(malloc); + SNMALLOC_EXPORT void* (*SNMALLOC_NAME_MANGLE(__realloc_hook))( + void* ptr, size_t size) = &SNMALLOC_NAME_MANGLE(realloc); + SNMALLOC_EXPORT void* (*SNMALLOC_NAME_MANGLE(__memalign_hook))( + size_t alignment, size_t size) = &SNMALLOC_NAME_MANGLE(memalign); +#endif } diff --git a/3rdparty/exported/snmalloc/src/snmalloc/override/new.cc b/3rdparty/exported/snmalloc/src/snmalloc/override/new.cc index c0acc953addb..29372a7b231d 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/override/new.cc +++ b/3rdparty/exported/snmalloc/src/snmalloc/override/new.cc @@ -71,3 +71,51 @@ void operator delete[](void* p, std::nothrow_t&) { ThreadAlloc::get().dealloc(p); } + +void* operator new(size_t size, std::align_val_t val) +{ + size = aligned_size(size_t(val), size); + return ThreadAlloc::get().alloc(size); +} + +void* operator new[](size_t size, std::align_val_t val) +{ + size = aligned_size(size_t(val), size); + return ThreadAlloc::get().alloc(size); +} + +void* operator new(size_t size, std::align_val_t val, std::nothrow_t&) +{ + size = aligned_size(size_t(val), size); + return ThreadAlloc::get().alloc(size); +} + +void* operator new[](size_t size, std::align_val_t val, std::nothrow_t&) +{ + size = aligned_size(size_t(val), size); + return ThreadAlloc::get().alloc(size); +} + +void operator delete(void* p, std::align_val_t)EXCEPTSPEC +{ + ThreadAlloc::get().dealloc(p); +} + +void operator delete[](void* p, std::align_val_t) EXCEPTSPEC +{ + ThreadAlloc::get().dealloc(p); +} + +void operator delete(void* p, size_t size, std::align_val_t val)EXCEPTSPEC +{ + size = aligned_size(size_t(val), size); + ThreadAlloc::get().dealloc(p, size); +} + +void operator delete[](void* p, size_t size, std::align_val_t val) EXCEPTSPEC +{ + if (p == nullptr) + return; + size = aligned_size(size_t(val), size); + ThreadAlloc::get().dealloc(p, size); +} diff --git a/3rdparty/exported/snmalloc/src/snmalloc/override/rust.cc b/3rdparty/exported/snmalloc/src/snmalloc/override/rust.cc index 8892323203c6..64da984ca71e 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/override/rust.cc +++ b/3rdparty/exported/snmalloc/src/snmalloc/override/rust.cc @@ -48,6 +48,6 @@ extern "C" SNMALLOC_EXPORT void* SNMALLOC_NAME_MANGLE(rust_realloc)( extern "C" SNMALLOC_EXPORT void SNMALLOC_NAME_MANGLE(rust_statistics)( size_t* current_memory_usage, size_t* peak_memory_usage) { - *current_memory_usage = Globals::get_current_usage(); - *peak_memory_usage = Globals::get_peak_usage(); + *current_memory_usage = StandardConfig::Backend::get_current_usage(); + *peak_memory_usage = StandardConfig::Backend::get_peak_usage(); } \ No newline at end of file diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal.h index 697b22cb9958..47dde5e498d4 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal.h @@ -37,49 +37,42 @@ namespace snmalloc { -#if !defined(OPEN_ENCLAVE) || defined(OPEN_ENCLAVE_SIMULATION) using DefaultPal = -# if defined(_WIN32) +#if defined(SNMALLOC_MEMORY_PROVIDER) + SNMALLOC_MEMORY_PROVIDER; +#elif defined(OPEN_ENCLAVE) + PALOpenEnclave; +#elif defined(_WIN32) PALWindows; -# elif defined(__APPLE__) +#elif defined(__APPLE__) PALApple<>; -# elif defined(__linux__) +#elif defined(__linux__) PALLinux; -# elif defined(FreeBSD_KERNEL) +#elif defined(FreeBSD_KERNEL) PALFreeBSDKernel; -# elif defined(__FreeBSD__) +#elif defined(__FreeBSD__) PALFreeBSD; -# elif defined(__HAIKU__) +#elif defined(__HAIKU__) PALHaiku; -# elif defined(__NetBSD__) +#elif defined(__NetBSD__) PALNetBSD; -# elif defined(__OpenBSD__) +#elif defined(__OpenBSD__) PALOpenBSD; -# elif defined(__sun) +#elif defined(__sun) PALSolaris; -# elif defined(__DragonFly__) +#elif defined(__DragonFly__) PALDragonfly; -# else -# error Unsupported platform -# endif -#endif - - using Pal = -#if defined(SNMALLOC_MEMORY_PROVIDER) - PALPlainMixin; -#elif defined(OPEN_ENCLAVE) - PALOpenEnclave; #else - DefaultPal; +# error Unsupported platform #endif [[noreturn]] SNMALLOC_SLOW_PATH inline void error(const char* const str) { - Pal::error(str); + DefaultPal::error(str); } // Used to keep Superslab metadata committed. - static constexpr size_t OS_PAGE_SIZE = Pal::page_size; + static constexpr size_t OS_PAGE_SIZE = DefaultPal::page_size; /** * Perform platform-specific adjustment of return pointers. @@ -88,23 +81,24 @@ namespace snmalloc * disruption to PALs for platforms that do not support StrictProvenance AALs. */ template< - typename PAL = Pal, + typename PAL = DefaultPal, typename AAL = Aal, typename T, - SNMALLOC_CONCEPT(capptr::ConceptBound) B> + SNMALLOC_CONCEPT(capptr::IsBound) B> static inline typename std::enable_if_t< !aal_supports, CapPtr>> capptr_to_user_address_control(CapPtr p) { - return CapPtr>(p.unsafe_ptr()); + return CapPtr>::unsafe_from( + p.unsafe_ptr()); } template< - typename PAL = Pal, + typename PAL = DefaultPal, typename AAL = Aal, typename T, - SNMALLOC_CONCEPT(capptr::ConceptBound) B> + SNMALLOC_CONCEPT(capptr::IsBound) B> static SNMALLOC_FAST_PATH typename std::enable_if_t< aal_supports, CapPtr>> @@ -125,7 +119,7 @@ namespace snmalloc typename PAL, bool page_aligned = false, typename T, - SNMALLOC_CONCEPT(capptr::ConceptBound) B> + SNMALLOC_CONCEPT(capptr::IsBound) B> static SNMALLOC_FAST_PATH void pal_zero(CapPtr p, size_t sz) { static_assert( @@ -177,26 +171,15 @@ namespace snmalloc [[noreturn]] inline void report_fatal_error(Args... args) { MessageBuilder msg{std::forward(args)...}; - Pal::error(msg.get_message()); - } - - static inline size_t get_tid() - { - static thread_local size_t tid{0}; - static std::atomic tid_source{1}; - - if (tid == 0) - { - tid = tid_source++; - } - return tid; + DefaultPal::error(msg.get_message()); } template inline void message(Args... args) { MessageBuilder msg{std::forward(args)...}; - MessageBuilder msg_tid{"{}: {}", get_tid(), msg.get_message()}; - Pal::message(msg_tid.get_message()); + MessageBuilder msg_tid{ + "{}: {}", DefaultPal::get_tid(), msg.get_message()}; + DefaultPal::message(msg_tid.get_message()); } } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_apple.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_apple.h index 69f4e5da0efe..f023e195af76 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_apple.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_apple.h @@ -125,7 +125,7 @@ namespace snmalloc while (madvise(p, size, MADV_FREE_REUSABLE) == -1 && errno == EAGAIN) ; - if constexpr (PalEnforceAccess) + if constexpr (mitigations(pal_enforce_access)) { // This must occur after `MADV_FREE_REUSABLE`. // @@ -180,7 +180,7 @@ namespace snmalloc } } - if constexpr (PalEnforceAccess) + if constexpr (mitigations(pal_enforce_access)) { // Mark pages as writable for `madvise` below. // @@ -220,7 +220,7 @@ namespace snmalloc // must be initialized to 0 or addr is interepreted as a lower-bound. mach_vm_address_t addr = 0; - vm_prot_t prot = (state_using || !PalEnforceAccess) ? + vm_prot_t prot = (state_using || !mitigations(pal_enforce_access)) ? VM_PROT_READ | VM_PROT_WRITE : VM_PROT_NONE; diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_bsd.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_bsd.h index 4689b43c0e53..59bcdc0abf8d 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_bsd.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_bsd.h @@ -40,7 +40,7 @@ namespace snmalloc madvise(p, size, MADV_FREE); - if constexpr (PalEnforceAccess) + if constexpr (mitigations(pal_enforce_access)) { mprotect(p, size, PROT_NONE); } diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_bsd_aligned.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_bsd_aligned.h index 4c88287f362b..48b28db53dad 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_bsd_aligned.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_bsd_aligned.h @@ -23,7 +23,8 @@ namespace snmalloc static constexpr uint64_t pal_features = AlignedAllocation | PALBSD::pal_features; - static SNMALLOC_CONSTINIT_STATIC size_t minimum_alloc_size = 4096; + static SNMALLOC_CONSTINIT_STATIC size_t minimum_alloc_size = + aal_supports ? 1 << 24 : 4096; /** * Reserve memory at a specific alignment. @@ -37,8 +38,9 @@ namespace snmalloc int log2align = static_cast(bits::next_pow2_bits(size)); - auto prot = - state_using || !PalEnforceAccess ? PROT_READ | PROT_WRITE : PROT_NONE; + auto prot = state_using || !mitigations(pal_enforce_access) ? + PROT_READ | PROT_WRITE : + PROT_NONE; void* p = mmap( nullptr, diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_concept.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_concept.h index 78270f27cc3e..44dec410a000 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_concept.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_concept.h @@ -19,7 +19,7 @@ namespace snmalloc * PALs must advertize the bit vector of their supported features. */ template - concept ConceptPAL_static_features = requires() + concept IsPAL_static_features = requires() { typename std::integral_constant; }; @@ -28,7 +28,7 @@ namespace snmalloc * PALs must advertise the size of the address space and their page size */ template - concept ConceptPAL_static_sizes = requires() + concept IsPAL_static_sizes = requires() { typename std::integral_constant; typename std::integral_constant; @@ -38,7 +38,7 @@ namespace snmalloc * PALs expose an error reporting function which takes a const C string. */ template - concept ConceptPAL_error = requires(const char* const str) + concept IsPAL_error = requires(const char* const str) { { PAL::error(str) @@ -50,7 +50,7 @@ namespace snmalloc * PALs expose a basic library of memory operations. */ template - concept ConceptPAL_memops = requires(void* vp, std::size_t sz) + concept IsPAL_memops = requires(void* vp, std::size_t sz) { { PAL::notify_not_using(vp, sz) @@ -76,11 +76,25 @@ namespace snmalloc noexcept->ConceptSame; }; + /** + * The Pal must provide a thread id for debugging. It should not return + * the default value of ThreadIdentity, as that is used as not an tid in some + * places. + */ + template + concept IsPAL_tid = requires() + { + { + PAL::get_tid() + } + noexcept->ConceptSame; + }; + /** * Absent any feature flags, the PAL must support a crude primitive allocator */ template - concept ConceptPAL_reserve = requires(PAL p, std::size_t sz) + concept IsPAL_reserve = requires(PAL p, std::size_t sz) { { PAL::reserve(sz) @@ -92,7 +106,7 @@ namespace snmalloc * Some PALs expose a richer allocator which understands aligned allocations */ template - concept ConceptPAL_reserve_aligned = requires(std::size_t sz) + concept IsPAL_reserve_aligned = requires(std::size_t sz) { { PAL::template reserve_aligned(sz) @@ -108,7 +122,7 @@ namespace snmalloc * Some PALs can provide memory pressure callbacks. */ template - concept ConceptPAL_mem_low_notify = requires(PalNotificationObject* pno) + concept IsPAL_mem_low_notify = requires(PalNotificationObject* pno) { { PAL::expensive_low_memory_check() @@ -121,7 +135,7 @@ namespace snmalloc }; template - concept ConceptPAL_get_entropy64 = requires() + concept IsPAL_get_entropy64 = requires() { { PAL::get_entropy64() @@ -135,18 +149,20 @@ namespace snmalloc * requisite claimed pal_features. PALs not claiming particular features * are, naturally, not bound by the corresponding concept. */ + // clang-format off template - concept ConceptPAL = ConceptPAL_static_features&& - ConceptPAL_static_sizes&& ConceptPAL_error&& - ConceptPAL_memops && - (!pal_supports || - ConceptPAL_get_entropy64< - PAL>)&&(!pal_supports || - ConceptPAL_mem_low_notify< - PAL>)&&(pal_supports || - ((!pal_supports || - ConceptPAL_reserve_aligned< - PAL>)&&ConceptPAL_reserve)); + concept IsPAL = + IsPAL_static_features && + IsPAL_static_sizes && + IsPAL_error && + IsPAL_memops && + IsPAL_tid && + (!pal_supports || IsPAL_get_entropy64) && + (!pal_supports || IsPAL_mem_low_notify) && + (pal_supports || + ((!pal_supports || IsPAL_reserve_aligned) && + IsPAL_reserve)); + // clang-format on } // namespace snmalloc #endif diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_consts.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_consts.h index 8de4cb09ca3a..83aa52ef2f95 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_consts.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_consts.h @@ -7,26 +7,6 @@ namespace snmalloc { - /** - * Pal implementations should query this flag to see whether they - * are allowed to optimise memory access, or that they must provide - * exceptions/segfaults if accesses do not obey the - * - using - * - using_readonly - * - not_using - * model. - * - * TODO: There is a known bug in CheriBSD that means round-tripping through - * PROT_NONE sheds capability load and store permissions (while restoring data - * read/write, for added excitement). For the moment, just force this down on - * CHERI. - */ -#if defined(SNMALLOC_CHECK_CLIENT) && !defined(__CHERI_PURE_CAPABILITY__) - static constexpr bool PalEnforceAccess = true; -#else - static constexpr bool PalEnforceAccess = false; -#endif - /** * Flags in a bitfield of optional features that a PAL may support. These * should be set in the PAL's `pal_features` static constexpr field. @@ -103,5 +83,5 @@ namespace snmalloc * Query whether the PAL supports a specific feature. */ template - constexpr static bool pal_supports = (PAL::pal_features & F) == F; + static constexpr bool pal_supports = (PAL::pal_features & F) == F; } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_freebsd.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_freebsd.h index d24f4b7cf8a4..86a6576e49d1 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_freebsd.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_freebsd.h @@ -83,7 +83,7 @@ namespace snmalloc madvise(p, size, MADV_NOCORE); madvise(p, size, MADV_FREE); - if constexpr (PalEnforceAccess) + if constexpr (mitigations(pal_enforce_access)) { mprotect(p, size, PROT_NONE); } @@ -124,7 +124,7 @@ namespace snmalloc * manage the address space it references by clearing the SW_VMEM * permission bit. */ - template + template static SNMALLOC_FAST_PATH CapPtr> capptr_to_user_address_control(CapPtr p) { @@ -135,7 +135,7 @@ namespace snmalloc return nullptr; } } - return CapPtr>( + return CapPtr>::unsafe_from( __builtin_cheri_perms_and( p.unsafe_ptr(), ~static_cast(CHERI_PERM_SW_VMEM))); } diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_haiku.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_haiku.h index 9cc2b8d48403..bafe23c48781 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_haiku.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_haiku.h @@ -20,7 +20,7 @@ namespace snmalloc * PAL supports. * */ - static constexpr uint64_t pal_features = PALPOSIX::pal_features; + static constexpr uint64_t pal_features = PALPOSIX::pal_features | Entropy; /** * Haiku requires an explicit no-reserve flag in `mmap` to guarantee lazy @@ -37,6 +37,15 @@ namespace snmalloc SNMALLOC_ASSERT(is_aligned_block(p, size)); posix_madvise(p, size, POSIX_MADV_DONTNEED); } + + /** + * Hopefully a temporary workaround until the kernel random feature + * is exposed properly in the userspace ? + */ + static uint64_t get_entropy64() + { + return PALPOSIX::dev_urandom(); + } }; } // namespace snmalloc #endif diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_linux.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_linux.h index 3a06bf8e8bba..6f131b0cc6fd 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_linux.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_linux.h @@ -8,6 +8,11 @@ # include # include # include +// __has_include does not reliably determine if we actually have linux/random.h +// available +# if defined(SNMALLOC_HAS_LINUX_RANDOM_H) +# include +# endif extern "C" int puts(const char* str); @@ -35,6 +40,19 @@ namespace snmalloc */ static constexpr int default_mmap_flags = MAP_NORESERVE; + /** + * MADV_FREE is only available since Linux 4.5. + * + * Fallback to MADV_DONTNEED on older kernels + */ + static constexpr int madvise_free_flags = +# if defined(MADV_FREE) + MADV_FREE +# else + MADV_DONTNEED +# endif + ; + static void* reserve(size_t size) noexcept { void* p = PALPOSIX::reserve(size); @@ -108,9 +126,9 @@ namespace snmalloc memset(p, 0x5a, size); madvise(p, size, MADV_DONTDUMP); - madvise(p, size, MADV_FREE); + madvise(p, size, madvise_free_flags); - if constexpr (PalEnforceAccess) + if constexpr (mitigations(pal_enforce_access)) { mprotect(p, size, PROT_NONE); } diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_netbsd.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_netbsd.h index 75394704f895..6e91d98bf266 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_netbsd.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_netbsd.h @@ -4,6 +4,7 @@ # include "pal_bsd_aligned.h" # include +# include /** * We skip the pthread cancellation checkpoints by reaching directly @@ -42,12 +43,20 @@ namespace snmalloc PALBSD_Aligned::pal_features | Entropy; /** - * Temporary solution while waiting getrandom support for the next release + * Temporary solution for NetBSD < 10 * random_device seems unimplemented in clang for this platform + * otherwise using getrandom */ static uint64_t get_entropy64() { +# if defined(SYS_getrandom) + uint64_t result; + if (getrandom(&result, sizeof(result), 0) != sizeof(result)) + error("Failed to get system randomness"); + return result; +# else return PALPOSIX::dev_urandom(); +# endif } }; } // namespace snmalloc diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_noalloc.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_noalloc.h index d44d1495b0c3..94bc61e020b5 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_noalloc.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_noalloc.h @@ -17,7 +17,7 @@ namespace snmalloc * The minimal subset of a PAL that we need for delegation */ template - concept PALNoAllocBase = ConceptPAL_static_sizes&& ConceptPAL_error; + concept PALNoAllocBase = IsPAL_static_sizes&& IsPAL_error; #endif /** diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_open_enclave.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_open_enclave.h index 276af8219c8f..be0f141beca4 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_open_enclave.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_open_enclave.h @@ -1,6 +1,7 @@ #pragma once #include "pal_noalloc.h" +#include "pal_tid_default.h" #ifdef OPEN_ENCLAVE extern "C" void* oe_memset_s(void* p, size_t p_size, int c, size_t size); @@ -25,7 +26,7 @@ namespace snmalloc using OpenEnclaveBasePAL = PALNoAlloc; - class PALOpenEnclave : public OpenEnclaveBasePAL + class PALOpenEnclave : public OpenEnclaveBasePAL, public PalTidDefault { public: /** diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_posix.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_posix.h index 58b3c1f3bd9e..6c9ae05e85af 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_posix.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_posix.h @@ -1,6 +1,7 @@ #pragma once #include "../aal/aal.h" +#include "pal_tid_default.h" #include "pal_timer_default.h" #if defined(SNMALLOC_BACKTRACE_HEADER) # include SNMALLOC_BACKTRACE_HEADER @@ -38,7 +39,8 @@ namespace snmalloc * working when an early-malloc error appears. */ template - class PALPOSIX : public PalTimerDefaultImpl> + class PALPOSIX : public PalTimerDefaultImpl>, + public PalTidDefault { /** * Helper class to access the `default_mmap_flags` field of `OS` if one @@ -202,7 +204,7 @@ namespace snmalloc { SNMALLOC_ASSERT(is_aligned_block(p, size)); - if constexpr (PalEnforceAccess) + if constexpr (mitigations(pal_enforce_access)) { // Fill memory so that when we switch the pages back on we don't make // assumptions on the content. @@ -230,7 +232,7 @@ namespace snmalloc SNMALLOC_ASSERT( is_aligned_block(p, size) || (zero_mem == NoZero)); - if constexpr (PalEnforceAccess) + if constexpr (mitigations(pal_enforce_access)) mprotect(p, size, PROT_READ | PROT_WRITE); else { @@ -251,7 +253,7 @@ namespace snmalloc { SNMALLOC_ASSERT(is_aligned_block(p, size)); - if constexpr (PalEnforceAccess) + if constexpr (mitigations(pal_enforce_access)) mprotect(p, size, PROT_READ); else { @@ -324,7 +326,8 @@ namespace snmalloc // If enforcing access, map pages initially as None, and then // add permissions as required. Otherwise, immediately give all // access as this is the most efficient to implement. - auto prot = PalEnforceAccess ? PROT_NONE : PROT_READ | PROT_WRITE; + auto prot = + mitigations(pal_enforce_access) ? PROT_NONE : PROT_READ | PROT_WRITE; void* p = mmap( nullptr, diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_tid_default.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_tid_default.h new file mode 100644 index 000000000000..678af98b0d5b --- /dev/null +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_tid_default.h @@ -0,0 +1,30 @@ +#pragma once + +#include + +namespace snmalloc +{ + class PalTidDefault + { + public: + using ThreadIdentity = size_t; + + /** + * @brief Get the an id for the current thread. + * + * @return the thread id, this should never be the default of + * ThreadIdentity. Callers can assume it is a non-default value. + */ + static inline ThreadIdentity get_tid() noexcept + { + static thread_local size_t tid{0}; + static std::atomic tid_source{0}; + + if (tid == 0) + { + tid = ++tid_source; + } + return tid; + } + }; +} // namespace snmalloc \ No newline at end of file diff --git a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_windows.h b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_windows.h index 2d6f822836c4..2ab0bfc1ff26 100644 --- a/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_windows.h +++ b/3rdparty/exported/snmalloc/src/snmalloc/pal/pal_windows.h @@ -1,6 +1,7 @@ #pragma once #include "../aal/aal.h" +#include "pal_tid_default.h" #include "pal_timer_default.h" #ifdef _WIN32 @@ -26,7 +27,8 @@ namespace snmalloc { - class PALWindows : public PalTimerDefaultImpl + class PALWindows : public PalTimerDefaultImpl, + public PalTidDefault { /** * A flag indicating that we have tried to register for low-memory diff --git a/3rdparty/exported/snmalloc/src/test/func/cheri/cheri.cc b/3rdparty/exported/snmalloc/src/test/func/cheri/cheri.cc new file mode 100644 index 000000000000..4efaec6b7a42 --- /dev/null +++ b/3rdparty/exported/snmalloc/src/test/func/cheri/cheri.cc @@ -0,0 +1,281 @@ +#include + +#if defined(SNMALLOC_PASS_THROUGH) || !defined(__CHERI_PURE_CAPABILITY__) +// This test does not make sense in pass-through or w/o CHERI +int main() +{ + return 0; +} +#else + +// # define SNMALLOC_TRACING + +# include +# include +# include + +# if defined(__FreeBSD__) +# include +# endif + +using namespace snmalloc; + +bool cap_len_is(void* cap, size_t expected) +{ + return __builtin_cheri_length_get(cap) == expected; +} + +bool cap_vmem_perm_is(void* cap, bool expected) +{ +# if defined(CHERI_PERM_SW_VMEM) + return !!(__builtin_cheri_perms_get(cap) & CHERI_PERM_SW_VMEM) == expected; +# else +# warning "Don't know how to check VMEM permission bit" +# endif +} + +int main() +{ + +# if defined(__FreeBSD__) + { + size_t pagesize[8]; + int err = getpagesizes(pagesize, sizeof(pagesize) / sizeof(pagesize[0])); + SNMALLOC_CHECK(err > 0); + SNMALLOC_CHECK(pagesize[0] == OS_PAGE_SIZE); + } +# endif + + auto alloc = get_scoped_allocator(); + + message("Grab small object"); + { + static const size_t sz = 128; + void* o1 = alloc->alloc(sz); + SNMALLOC_CHECK(cap_len_is(o1, sz)); + SNMALLOC_CHECK(cap_vmem_perm_is(o1, false)); + alloc->dealloc(o1); + } + + /* + * This large object is sized to end up in our alloc's local buddy allocators + * when it's released. + */ + message("Grab large object"); + ptraddr_t alarge; + { + static const size_t sz = 1024 * 1024; + void* olarge = alloc->alloc(sz); + alarge = address_cast(olarge); + SNMALLOC_CHECK(cap_len_is(olarge, sz)); + SNMALLOC_CHECK(cap_vmem_perm_is(olarge, false)); + + static_cast(olarge)[128] = 'x'; + static_cast(olarge)[128 + OS_PAGE_SIZE] = 'y'; + +# if defined(__FreeBSD__) + static constexpr int irm = + MINCORE_INCORE | MINCORE_REFERENCED | MINCORE_MODIFIED; + char ic[2]; + int err = mincore(olarge, 2 * OS_PAGE_SIZE, ic); + SNMALLOC_CHECK(err == 0); + SNMALLOC_CHECK((ic[0] & irm) == irm); + SNMALLOC_CHECK((ic[1] & irm) == irm); + message("Large object in core; good"); +# endif + + alloc->dealloc(olarge); + } + + message("Grab large object again, verify reuse"); + { + static const size_t sz = 1024 * 1024; + errno = 0; + void* olarge = alloc->alloc(sz); + int err = errno; + + SNMALLOC_CHECK(alarge == address_cast(olarge)); + SNMALLOC_CHECK(err == 0); + +# if defined(__FreeBSD__) + /* + * Verify that the zeroing took place by mmap, which should mean that the + * first two pages are not in core. This implies that snmalloc successfully + * re-derived a Chunk- or Arena-bounded pointer and used that, and its VMAP + * permission, to tear pages out of the address space. + */ + static constexpr int irm = + MINCORE_INCORE | MINCORE_REFERENCED | MINCORE_MODIFIED; + char ic[2]; + err = mincore(olarge, 2 * OS_PAGE_SIZE, ic); + SNMALLOC_CHECK(err == 0); + SNMALLOC_CHECK((ic[0] & irm) == 0); + SNMALLOC_CHECK((ic[1] & irm) == 0); + message("Large object not in core; good"); +# endif + + SNMALLOC_CHECK(static_cast(olarge)[128] == '\0'); + SNMALLOC_CHECK(static_cast(olarge)[128 + OS_PAGE_SIZE] == '\0'); + SNMALLOC_CHECK(cap_len_is(olarge, sz)); + SNMALLOC_CHECK(cap_vmem_perm_is(olarge, false)); + + alloc->dealloc(olarge); + } + + /* + * Grab another CoreAlloc pointer from the pool and examine it. + * + * CoreAlloc-s come from the metadata pools of snmalloc, and so do not flow + * through the usual allocation machinery. + */ + message("Grab CoreAlloc from pool for inspection"); + { + static_assert( + std::is_same_v>); + + LocalCache lc{&StandardConfig::unused_remote}; + auto* ca = AllocPool::acquire(&lc); + + SNMALLOC_CHECK(cap_len_is(ca, sizeof(*ca))); + SNMALLOC_CHECK(cap_vmem_perm_is(ca, false)); + + /* + * Putting ca back into the pool would require unhooking our local cache, + * and that requires accessing privates. Since it's pretty harmless to do + * so here at the end of our test, just leak it. + */ + } + + /* + * Verify that our memcpy implementation successfully copies capabilities + * even when it is given a region that is not capability-aligned. + */ + message("Checking memcpy behaviors"); + { + static constexpr size_t ncaps = 16; + + int* icaps[ncaps]; + + for (size_t i = 0; i < ncaps; i++) + { + icaps[i] = (int*)&icaps[i]; + SNMALLOC_CHECK(__builtin_cheri_tag_get(icaps[i])); + } + + int* ocaps[ncaps]; + + /* + * While it may seem trivial, check the both-aligned case, both for one + * and for many capabilities. + */ + bzero(ocaps, sizeof(ocaps)); + snmalloc::memcpy(ocaps, icaps, sizeof(void*)); + SNMALLOC_CHECK(__builtin_cheri_tag_get(ocaps[0])); + SNMALLOC_CHECK(__builtin_cheri_equal_exact(icaps[0], ocaps[0])); + + bzero(ocaps, sizeof(ocaps)); + snmalloc::memcpy(ocaps, icaps, sizeof(icaps)); + for (size_t i = 0; i < ncaps; i++) + { + SNMALLOC_CHECK(__builtin_cheri_tag_get(ocaps[i])); + SNMALLOC_CHECK(__builtin_cheri_equal_exact(icaps[i], ocaps[i])); + } + + /* + * When both input and output are equally misaligned, we should preserve + * caps that aren't sheared by the copy. The size of this copy is also + * "unnatural", which should guarantee that any memcpy implementation that + * tries the overlapping-misaligned-sizeof(long)-at-the-end dance corrupts + * the penultimate capability by overwriting it with (identical) data. + * + * Probe a misaligned copy of bytes followed by a zero or more pointers + * followed by bytes. + */ + for (size_t pre = 1; pre < sizeof(int*); pre++) + { + for (size_t post = 0; post < sizeof(int*); post++) + { + for (size_t ptrs = 0; ptrs < ncaps - 2; ptrs++) + { + bzero(ocaps, sizeof(ocaps)); + + snmalloc::memcpy( + pointer_offset(ocaps, pre), + pointer_offset(icaps, pre), + (ptrs + 1) * sizeof(int*) - pre + post); + + /* prefix */ + SNMALLOC_CHECK( + memcmp( + pointer_offset(icaps, pre), + pointer_offset(ocaps, pre), + sizeof(int*) - pre) == 0); + /* pointer */ + for (size_t p = 0; p < ptrs; p++) + { + SNMALLOC_CHECK(__builtin_cheri_tag_get(ocaps[1 + p])); + SNMALLOC_CHECK( + __builtin_cheri_equal_exact(icaps[1 + p], ocaps[1 + p])); + } + /* suffix */ + SNMALLOC_CHECK(memcmp(&icaps[1 + ptrs], &ocaps[1 + ptrs], post) == 0); + } + } + } + + /* + * If the alignments are different, then the bytes should get copied but + * the tags should be cleared. + */ + for (size_t sa = 0; sa < sizeof(int*); sa++) + { + for (size_t da = 0; da < sizeof(int*); da++) + { + static constexpr size_t n = 4; + + if (sa == da) + { + continue; + } + + bzero(ocaps, n * sizeof(int*)); + + snmalloc::memcpy( + pointer_offset(ocaps, da), + pointer_offset(icaps, sa), + n * sizeof(int*) - da - sa); + + for (size_t i = 0; i < n; i++) + { + SNMALLOC_CHECK(__builtin_cheri_tag_get(ocaps[i]) == 0); + } + + SNMALLOC_CHECK( + memcmp( + pointer_offset(icaps, sa), + pointer_offset(ocaps, da), + n * sizeof(int*) - da - sa) == 0); + } + } + } + + message("Verify sizeclass representability"); + { + for (size_t sc = 0; sc < NUM_SMALL_SIZECLASSES; sc++) + { + size_t sz = sizeclass_full_to_size(sizeclass_t::from_small_class(sc)); + SNMALLOC_CHECK(sz == Aal::capptr_size_round(sz)); + } + + for (size_t sc = 0; sc < bits::BITS; sc++) + { + size_t sz = sizeclass_full_to_size(sizeclass_t::from_large_class(sc)); + SNMALLOC_CHECK(sz == Aal::capptr_size_round(sz)); + } + } + + message("CHERI checks OK"); + return 0; +} + +#endif diff --git a/3rdparty/exported/snmalloc/src/test/func/domestication/domestication.cc b/3rdparty/exported/snmalloc/src/test/func/domestication/domestication.cc index 726a2f3945c5..03cc9ba3bdf6 100644 --- a/3rdparty/exported/snmalloc/src/test/func/domestication/domestication.cc +++ b/3rdparty/exported/snmalloc/src/test/func/domestication/domestication.cc @@ -11,20 +11,46 @@ int main() // # define SNMALLOC_TRACING # include +# include +# include # include // Specify type of allocator # define SNMALLOC_PROVIDE_OWN_CONFIG namespace snmalloc { - class CustomGlobals : public BackendAllocator + class CustomConfig : public CommonConfig { public: - using GlobalPoolState = PoolState>; + using Pal = DefaultPal; + using PagemapEntry = DefaultPagemapEntry; private: - using Backend = BackendAllocator; + using ConcretePagemap = + FlatPagemap; + public: + using Pagemap = BasicPagemap; + + using ConcreteAuthmap = + FlatPagemap(), capptr::Arena, Pal, false>; + + using Authmap = DefaultAuthmap; + + public: + using Base = Pipe< + PalRange, + PagemapRegisterRange, + PagemapRegisterRange>; + + using LocalState = StandardLocalState; + + using GlobalPoolState = PoolState>; + + using Backend = + BackendAllocator; + + private: SNMALLOC_REQUIRE_CONSTINIT inline static GlobalPoolState alloc_pool; @@ -61,11 +87,11 @@ namespace snmalloc static inline uintptr_t domesticate_patch_value; /* Verify that a pointer points into the region managed by this config */ - template + template static CapPtr< T, typename B::template with_wildness> - capptr_domesticate(typename Backend::LocalState*, CapPtr p) + capptr_domesticate(LocalState*, CapPtr p) { domesticate_count++; @@ -85,17 +111,17 @@ namespace snmalloc { std::cout << "Patching over corruption" << std::endl; *domesticate_patch_location = domesticate_patch_value; - snmalloc::CustomGlobals::domesticate_patch_location = nullptr; + snmalloc::CustomConfig::domesticate_patch_location = nullptr; } return CapPtr< T, - typename B::template with_wildness>( - p.unsafe_ptr()); + typename B::template with_wildness>:: + unsafe_from(p.unsafe_ptr()); } }; - using Alloc = LocalAllocator; + using Alloc = LocalAllocator; } # define SNMALLOC_NAME_MANGLE(a) test_##a @@ -103,12 +129,16 @@ namespace snmalloc int main() { - snmalloc::CustomGlobals::init(); // init pagemap - snmalloc::CustomGlobals::domesticate_count = 0; + static constexpr bool pagemap_randomize = + mitigations(random_pagemap) & !aal_supports; + + snmalloc::CustomConfig::Pagemap::concretePagemap.init(); + snmalloc::CustomConfig::Authmap::init(); + snmalloc::CustomConfig::domesticate_count = 0; LocalEntropy entropy; - entropy.init(); - key_global = FreeListKey(entropy.get_free_list_key()); + entropy.init(); + RemoteAllocator::key_global = FreeListKey(entropy.get_free_list_key()); auto alloc1 = new Alloc(); @@ -123,21 +153,20 @@ int main() alloc2->flush(); // Clobber the linkage but not the back pointer - snmalloc::CustomGlobals::domesticate_patch_location = + snmalloc::CustomConfig::domesticate_patch_location = static_cast(p); - snmalloc::CustomGlobals::domesticate_patch_value = - *static_cast(p); + snmalloc::CustomConfig::domesticate_patch_value = *static_cast(p); memset(p, 0xA5, sizeof(void*)); - snmalloc::CustomGlobals::domesticate_trace = true; - snmalloc::CustomGlobals::domesticate_count = 0; + snmalloc::CustomConfig::domesticate_trace = true; + snmalloc::CustomConfig::domesticate_count = 0; // Open a new slab, so that slow path will pick up the message queue. That // means this should be a sizeclass we've not used before, even internally. auto q = alloc1->alloc(512); std::cout << "Allocated q " << q << std::endl; - snmalloc::CustomGlobals::domesticate_trace = false; + snmalloc::CustomConfig::domesticate_trace = false; /* * Expected domestication calls in the above message passing: @@ -152,8 +181,8 @@ int main() * after q). */ static constexpr size_t expected_count = - snmalloc::CustomGlobals::Options.QueueHeadsAreTame ? 2 : 3; - SNMALLOC_CHECK(snmalloc::CustomGlobals::domesticate_count == expected_count); + snmalloc::CustomConfig::Options.QueueHeadsAreTame ? 2 : 3; + SNMALLOC_CHECK(snmalloc::CustomConfig::domesticate_count == expected_count); // Prevent the allocators from going out of scope during the above test alloc1->flush(); diff --git a/3rdparty/exported/snmalloc/src/test/func/fixed_region/fixed_region.cc b/3rdparty/exported/snmalloc/src/test/func/fixed_region/fixed_region.cc index 0a9962731fee..2c00c7b8cb59 100644 --- a/3rdparty/exported/snmalloc/src/test/func/fixed_region/fixed_region.cc +++ b/3rdparty/exported/snmalloc/src/test/func/fixed_region/fixed_region.cc @@ -11,7 +11,7 @@ using namespace snmalloc; -using CustomGlobals = FixedGlobals>; +using CustomGlobals = FixedRangeConfig>; using FixedAlloc = LocalAllocator; int main() @@ -23,8 +23,8 @@ int main() // It is also large enough for the example to run in. // For 1MiB superslabs, SUPERSLAB_BITS + 4 is not big enough for the example. auto size = bits::one_at_bit(28); - auto oe_base = Pal::reserve(size); - Pal::notify_using(oe_base, size); + auto oe_base = DefaultPal::reserve(size); + DefaultPal::notify_using(oe_base, size); auto oe_end = pointer_offset(oe_base, size); std::cout << "Allocated region " << oe_base << " - " << pointer_offset(oe_base, size) << std::endl; diff --git a/3rdparty/exported/snmalloc/src/test/func/malloc/malloc.cc b/3rdparty/exported/snmalloc/src/test/func/malloc/malloc.cc index 04b883973334..1d4c31da9e07 100644 --- a/3rdparty/exported/snmalloc/src/test/func/malloc/malloc.cc +++ b/3rdparty/exported/snmalloc/src/test/func/malloc/malloc.cc @@ -32,7 +32,7 @@ void check_result(size_t size, size_t align, void* p, int err, bool null) failed = true; } const auto alloc_size = our_malloc_usable_size(p); - auto expected_size = round_size(size); + auto expected_size = our_malloc_good_size(size); #ifdef SNMALLOC_PASS_THROUGH // Calling system allocator may allocate a larger block than // snmalloc. Note, we have called the system allocator with @@ -55,6 +55,14 @@ void check_result(size_t size, size_t align, void* p, int err, bool null) INFO("Cheri size is {}, but required to be {}.", cheri_size, alloc_size); failed = true; } +# if defined(CHERI_PERM_SW_VMEM) + const auto cheri_perms = __builtin_cheri_perms_get(p); + if (cheri_perms & CHERI_PERM_SW_VMEM) + { + INFO("Cheri permissions include VMEM authority"); + failed = true; + } +# endif if (p != nullptr) { /* @@ -367,6 +375,6 @@ int main(int argc, char** argv) our_malloc_usable_size(nullptr) == 0, "malloc_usable_size(nullptr) should be zero"); - snmalloc::debug_check_empty(); + snmalloc::debug_check_empty(); return 0; } diff --git a/3rdparty/exported/snmalloc/src/test/func/memory/memory.cc b/3rdparty/exported/snmalloc/src/test/func/memory/memory.cc index 57cafc235c95..2a2ada2eef07 100644 --- a/3rdparty/exported/snmalloc/src/test/func/memory/memory.cc +++ b/3rdparty/exported/snmalloc/src/test/func/memory/memory.cc @@ -184,7 +184,7 @@ void test_calloc() alloc.dealloc(p, size); } - snmalloc::debug_check_empty(); + snmalloc::debug_check_empty(); } void test_double_alloc() @@ -229,7 +229,7 @@ void test_double_alloc() } } } - snmalloc::debug_check_empty(); + snmalloc::debug_check_empty(); } void test_external_pointer() @@ -273,7 +273,7 @@ void test_external_pointer() alloc.dealloc(p1, size); } - snmalloc::debug_check_empty(); + snmalloc::debug_check_empty(); }; void check_offset(void* base, void* interior) @@ -305,7 +305,7 @@ void test_external_pointer_large() auto& alloc = ThreadAlloc::get(); - constexpr size_t count_log = Pal::address_bits > 32 ? 5 : 3; + constexpr size_t count_log = DefaultPal::address_bits > 32 ? 5 : 3; constexpr size_t count = 1 << count_log; // Pre allocate all the objects size_t* objects[count]; @@ -476,7 +476,7 @@ void test_remaining_bytes() char* p = (char*)alloc.alloc(size); for (size_t offset = 0; offset < size; offset++) { - auto rem = alloc.remaining_bytes(p + offset); + auto rem = alloc.remaining_bytes(address_cast(pointer_offset(p, offset))); if (rem != (size - offset)) { printf( diff --git a/3rdparty/exported/snmalloc/src/test/func/pagemap/pagemap.cc b/3rdparty/exported/snmalloc/src/test/func/pagemap/pagemap.cc index 1f00ff8af43c..dca7bf382990 100644 --- a/3rdparty/exported/snmalloc/src/test/func/pagemap/pagemap.cc +++ b/3rdparty/exported/snmalloc/src/test/func/pagemap/pagemap.cc @@ -56,7 +56,8 @@ void set(bool bounded, address_t address, T new_value) void test_pagemap(bool bounded) { address_t low = bits::one_at_bit(23); - address_t high = bits::one_at_bit(30); + address_t high = bits::one_at_bit(29); + void* base = nullptr; // Nullptr needs to work before initialisation CHECK_GET(bounded, 0, T()); @@ -64,20 +65,26 @@ void test_pagemap(bool bounded) // Initialise the pagemap if (bounded) { - auto size = bits::one_at_bit(30); - auto base = Pal::reserve(size); - Pal::notify_using(base, size); + auto size = bits::one_at_bit(29); + base = DefaultPal::reserve(size); + DefaultPal::notify_using(base, size); std::cout << "Fixed base: " << base << " (" << size << ") " << " end: " << pointer_offset(base, size) << std::endl; auto [heap_base, heap_size] = pagemap_test_bound.init(base, size); std::cout << "Heap base: " << heap_base << " (" << heap_size << ") " << " end: " << pointer_offset(heap_base, heap_size) << std::endl; low = address_cast(heap_base); + base = heap_base; high = low + heap_size; + // Store a pattern in heap. + memset(base, 0x23, high - low); } else { - pagemap_test_unbound.init(); + static constexpr bool pagemap_randomize = + mitigations(random_pagemap) && !aal_supports; + + pagemap_test_unbound.init(); pagemap_test_unbound.register_range(low, high - low); } @@ -99,6 +106,30 @@ void test_pagemap(bool bounded) // Check pattern is correctly stored std::cout << std::endl; + + if (bounded) + { + std::cout << "Checking heap" << std::endl; + // Check we have not corrupted the heap. + for (size_t offset = 0; offset < high - low; offset++) + { + if ((offset % (1ULL << 26)) == 0) + std::cout << "." << std::flush; + auto* p = ((char*)base) + offset; + if (*p != 0x23) + { + printf("Heap and pagemap have collided at %p", p); + abort(); + } + } + + std::cout << std::endl; + std::cout << "Storing new pattern" << std::endl; + // Store a different pattern in heap. + memset(base, 0x23, high - low); + } + + std::cout << "Checking pagemap contents" << std::endl; value = 1; for (address_t ptr = low; ptr < high; ptr += bits::one_at_bit(GRANULARITY_BITS + 3)) diff --git a/3rdparty/exported/snmalloc/src/test/func/pool/pool.cc b/3rdparty/exported/snmalloc/src/test/func/pool/pool.cc index 215c39355a74..7eeff87438cb 100644 --- a/3rdparty/exported/snmalloc/src/test/func/pool/pool.cc +++ b/3rdparty/exported/snmalloc/src/test/func/pool/pool.cc @@ -1,3 +1,5 @@ +#include +#include #include #include #include @@ -12,7 +14,7 @@ struct PoolAEntry : Pooled PoolAEntry() : field(1){}; }; -using PoolA = Pool; +using PoolA = Pool; struct PoolBEntry : Pooled { @@ -22,14 +24,42 @@ struct PoolBEntry : Pooled PoolBEntry(int f) : field(f){}; }; -using PoolB = Pool; +using PoolB = Pool; + +struct PoolLargeEntry : Pooled +{ + std::array payload; + + PoolLargeEntry() + { + printf("."); + fflush(stdout); + payload[0] = 1; + printf("first %d\n", payload[0]); + payload[1'999'999] = 1; + printf("last %d\n", payload[1'999'999]); + }; +}; + +using PoolLarge = Pool; + +template +struct PoolSortEntry : Pooled> +{ + int field; + + PoolSortEntry(int f) : field(f){}; +}; + +template +using PoolSort = Pool, Alloc::Config>; void test_alloc() { auto ptr = PoolA::acquire(); SNMALLOC_CHECK(ptr != nullptr); // Pool allocations should not be visible to debug_check_empty. - snmalloc::debug_check_empty(); + snmalloc::debug_check_empty(); PoolA::release(ptr); } @@ -80,6 +110,9 @@ void test_double_alloc() SNMALLOC_CHECK(ptr1 != ptr2); PoolA::release(ptr2); auto ptr3 = PoolA::acquire(); + // The following check assumes a stack discipline for acquire/release. + // Placing it first in the list of tests means, there is a single element + // and thus it works for both stack and queue. SNMALLOC_CHECK(ptr2 == ptr3); PoolA::release(ptr1); PoolA::release(ptr3); @@ -116,6 +149,68 @@ void test_iterator() PoolA::release(after_iteration_ptr); } +void test_large() +{ + printf("."); + fflush(stdout); + PoolLargeEntry* p = PoolLarge::acquire(); + printf("."); + fflush(stdout); + PoolLarge::release(p); + printf("."); + fflush(stdout); +} + +/** + * This test confirms that the pool is sorted consistently with + * respect to the iterator after a call to sort. + */ +template +void test_sort() +{ + auto position = [](PoolSortEntry* ptr) { + size_t i = 0; + auto curr = PoolSort::iterate(); + while (ptr != curr) + { + curr = PoolSort::iterate(curr); + ++i; + } + return i; + }; + + // This test checks that `sort` puts the elements in the right order, + // so it is the same as if they had been allocated in that order. + auto a1 = PoolSort::acquire(1); + auto a2 = PoolSort::acquire(1); + + auto position1 = position(a1); + auto position2 = position(a2); + + // Release in either order. + if (order) + { + PoolSort::release(a1); + PoolSort::release(a2); + } + else + { + PoolSort::release(a2); + PoolSort::release(a1); + } + + PoolSort::sort(); + + auto b1 = PoolSort::acquire(1); + auto b2 = PoolSort::acquire(1); + + SNMALLOC_CHECK(position1 == position(b1)); + SNMALLOC_CHECK(position2 == position(b2)); + + PoolSort::release(b1); + PoolSort::release(b2); +} + int main(int argc, char** argv) { setup(); @@ -127,11 +222,23 @@ int main(int argc, char** argv) UNUSED(argc, argv); #endif + test_double_alloc(); + std::cout << "test_double_alloc passed" << std::endl; test_alloc(); + std::cout << "test_alloc passed" << std::endl; test_constructor(); + std::cout << "test_constructor passed" << std::endl; test_alloc_many(); - test_double_alloc(); + std::cout << "test_alloc_many passed" << std::endl; test_different_alloc(); + std::cout << "test_different_alloc passed" << std::endl; test_iterator(); + std::cout << "test_iterator passed" << std::endl; + test_large(); + std::cout << "test_large passed" << std::endl; + test_sort(); + std::cout << "test_sort passed" << std::endl; + test_sort(); + std::cout << "test_sort passed" << std::endl; return 0; } diff --git a/3rdparty/exported/snmalloc/src/test/func/statistics/stats.cc b/3rdparty/exported/snmalloc/src/test/func/statistics/stats.cc index d83dd33058f0..c8db1cad762a 100644 --- a/3rdparty/exported/snmalloc/src/test/func/statistics/stats.cc +++ b/3rdparty/exported/snmalloc/src/test/func/statistics/stats.cc @@ -1,41 +1,118 @@ -#include - +#ifdef SNMALLOC_PASS_THROUGH // This test depends on snmalloc internals int main() { -#ifndef SNMALLOC_PASS_THROUGH // This test depends on snmalloc internals + return 0; +} +#else +# include +# include +# include + +template +void debug_check_empty_1() +{ + std::cout << "debug_check_empty_1 " << size << std::endl; snmalloc::Alloc& a = snmalloc::ThreadAlloc::get(); bool result; - auto r = a.alloc(16); + auto r = a.alloc(size); - snmalloc::debug_check_empty(&result); + snmalloc::debug_check_empty(&result); if (result != false) { + std::cout << "debug_check_empty failed to detect leaked memory:" << size + << std::endl; abort(); } a.dealloc(r); - snmalloc::debug_check_empty(&result); + snmalloc::debug_check_empty(&result); if (result != true) { + std::cout << "debug_check_empty failed to say empty:" << size << std::endl; abort(); } - r = a.alloc(16); + r = a.alloc(size); - snmalloc::debug_check_empty(&result); + snmalloc::debug_check_empty(&result); if (result != false) { + std::cout << "debug_check_empty failed to detect leaked memory:" << size + << std::endl; abort(); } a.dealloc(r); - snmalloc::debug_check_empty(&result); + snmalloc::debug_check_empty(&result); if (result != true) { + std::cout << "debug_check_empty failed to say empty:" << size << std::endl; abort(); } -#endif } + +template +void debug_check_empty_2() +{ + std::cout << "debug_check_empty_2 " << size << std::endl; + snmalloc::Alloc& a = snmalloc::ThreadAlloc::get(); + bool result; + std::vector allocs; + // 1GB of allocations + size_t count = snmalloc::bits::min(2048, 1024 * 1024 * 1024 / size); + + for (size_t i = 0; i < count; i++) + { + if (i % (count / 16) == 0) + { + std::cout << "." << std::flush; + } + auto r = a.alloc(size); + allocs.push_back(r); + snmalloc::debug_check_empty(&result); + if (result != false) + { + std::cout << "False empty after " << i << " allocations of " << size + << std::endl; + abort(); + } + } + std::cout << std::endl; + + for (size_t i = 0; i < count; i++) + { + if (i % (count / 16) == 0) + { + std::cout << "." << std::flush; + } + snmalloc::debug_check_empty(&result); + if (result != false) + { + std::cout << "False empty after " << i << " deallocations of " << size + << std::endl; + abort(); + } + a.dealloc(allocs[i]); + } + std::cout << std::endl; + snmalloc::debug_check_empty(); +} + +int main() +{ + debug_check_empty_1<16>(); + debug_check_empty_1<16384>(); + debug_check_empty_1<65536>(); + debug_check_empty_1<1024 * 1024 * 32>(); + + debug_check_empty_2<32>(); + debug_check_empty_2<16384>(); + debug_check_empty_2<65535>(); + debug_check_empty_2<1024 * 1024 * 32>(); + + return 0; +} +#endif \ No newline at end of file diff --git a/3rdparty/exported/snmalloc/src/test/func/thread_alloc_external/thread_alloc_external.cc b/3rdparty/exported/snmalloc/src/test/func/thread_alloc_external/thread_alloc_external.cc index b8b1b2313b56..2b10ed8cbcd2 100644 --- a/3rdparty/exported/snmalloc/src/test/func/thread_alloc_external/thread_alloc_external.cc +++ b/3rdparty/exported/snmalloc/src/test/func/thread_alloc_external/thread_alloc_external.cc @@ -12,7 +12,7 @@ namespace snmalloc { - using Alloc = snmalloc::LocalAllocator; + using Alloc = snmalloc::LocalAllocator; } using namespace snmalloc; diff --git a/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/alloc1.cc b/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/alloc1.cc index 8bfe4134549e..74996b5178b1 100644 --- a/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/alloc1.cc +++ b/3rdparty/exported/snmalloc/src/test/func/two_alloc_types/alloc1.cc @@ -12,7 +12,7 @@ #define SNMALLOC_PROVIDE_OWN_CONFIG namespace snmalloc { - using CustomGlobals = FixedGlobals>; + using CustomGlobals = FixedRangeConfig>; using Alloc = LocalAllocator; } diff --git a/3rdparty/exported/snmalloc/src/test/helpers.h b/3rdparty/exported/snmalloc/src/test/helpers.h index 7e11ba2acba7..30f6e4655e6f 100644 --- a/3rdparty/exported/snmalloc/src/test/helpers.h +++ b/3rdparty/exported/snmalloc/src/test/helpers.h @@ -19,7 +19,7 @@ namespace snmalloc { \ current_test = __PRETTY_FUNCTION__; \ MessageBuilder<1024> mb{"Starting test: " msg "\n", ##__VA_ARGS__}; \ - Pal::message(mb.get_message()); \ + DefaultPal::message(mb.get_message()); \ } while (0) /** @@ -33,7 +33,7 @@ namespace snmalloc do \ { \ MessageBuilder<1024> mb{msg "\n", ##__VA_ARGS__}; \ - Pal::message(mb.get_message()); \ + DefaultPal::message(mb.get_message()); \ } while (0) } diff --git a/3rdparty/exported/snmalloc/src/test/perf/contention/contention.cc b/3rdparty/exported/snmalloc/src/test/perf/contention/contention.cc index bcea629d2958..e266f0491020 100644 --- a/3rdparty/exported/snmalloc/src/test/perf/contention/contention.cc +++ b/3rdparty/exported/snmalloc/src/test/perf/contention/contention.cc @@ -154,7 +154,7 @@ void test_tasks(size_t num_tasks, size_t count, size_t size) } #ifndef NDEBUG - snmalloc::debug_check_empty(); + snmalloc::debug_check_empty(); #endif }; diff --git a/3rdparty/exported/snmalloc/src/test/perf/external_pointer/externalpointer.cc b/3rdparty/exported/snmalloc/src/test/perf/external_pointer/externalpointer.cc index b2509a65e478..be3306cba00b 100644 --- a/3rdparty/exported/snmalloc/src/test/perf/external_pointer/externalpointer.cc +++ b/3rdparty/exported/snmalloc/src/test/perf/external_pointer/externalpointer.cc @@ -19,7 +19,7 @@ namespace test { size_t rand = (size_t)r.next(); size_t offset = bits::clz(rand); - if constexpr (Pal::address_bits > 32) + if constexpr (DefaultPal::address_bits > 32) { if (offset > 30) offset = 30; @@ -47,7 +47,7 @@ namespace test alloc.dealloc(objects[i]); } - snmalloc::debug_check_empty(); + snmalloc::debug_check_empty(); } void test_external_pointer(xoroshiro::p128r64& r) diff --git a/3rdparty/exported/snmalloc/src/test/perf/memcpy/memcpy.cc b/3rdparty/exported/snmalloc/src/test/perf/memcpy/memcpy.cc index 7642d60cc32c..e3bee7d2c784 100644 --- a/3rdparty/exported/snmalloc/src/test/perf/memcpy/memcpy.cc +++ b/3rdparty/exported/snmalloc/src/test/perf/memcpy/memcpy.cc @@ -90,7 +90,12 @@ void memcpy_unchecked(void* dst, const void* src, size_t size) NOINLINE void memcpy_platform_checked(void* dst, const void* src, size_t size) { - check_bounds(dst, size, ""); + if (SNMALLOC_UNLIKELY(!check_bounds(dst, size))) + { + report_fatal_bounds_error(dst, size, ""); + return; + } + memcpy(dst, src, size); } @@ -101,15 +106,18 @@ int main(int argc, char** argv) bool full_test = opt.has("--full_test"); // size_t size = 0; - auto mc1 = [](void* dst, const void* src, size_t len) { + auto mc_platform_checked = [](void* dst, const void* src, size_t len) { memcpy_platform_checked(dst, src, len); }; - auto mc2 = [](void* dst, const void* src, size_t len) { + auto mc_sn = [](void* dst, const void* src, size_t len) { memcpy_unchecked(dst, src, len); }; - auto mc3 = [](void* dst, const void* src, size_t len) { + auto mc_platform = [](void* dst, const void* src, size_t len) { memcpy(dst, src, len); }; + auto mc_sn_checked = [](void* dst, const void* src, size_t len) { + memcpy_checked(dst, src, len); + }; std::vector sizes; for (size_t size = 0; size < 64; size++) @@ -137,11 +145,10 @@ int main(int argc, char** argv) sizes.push_back(size + 5); } - std::vector> stats_checked; - std::vector> stats_unchecked; - std::vector> stats_platform; + std::vector> stats_sn, + stats_sn_checked, stats_platform, stats_platform_checked; - printf("size, checked, unchecked, platform\n"); + printf("size, sn, sn-checked, platform, platform-checked\n"); size_t repeats = full_test ? 80 : 1; @@ -149,21 +156,25 @@ int main(int argc, char** argv) { for (auto copy_size : sizes) { - test(copy_size, mc1, stats_checked); - test(copy_size, mc2, stats_unchecked); - test(copy_size, mc3, stats_platform); + test(copy_size, mc_platform_checked, stats_platform_checked); + test(copy_size, mc_sn, stats_sn); + test(copy_size, mc_platform, stats_platform); + test(copy_size, mc_sn_checked, stats_sn_checked); } - for (size_t i = 0; i < stats_checked.size(); i++) + for (size_t i = 0; i < stats_sn.size(); i++) { - auto& s1 = stats_checked[i]; - auto& s2 = stats_unchecked[i]; + auto& s1 = stats_sn[i]; + auto& s2 = stats_sn_checked[i]; auto& s3 = stats_platform[i]; + auto& s4 = stats_platform_checked[i]; std::cout << s1.first << ", " << s1.second.count() << ", " - << s2.second.count() << ", " << s3.second.count() << std::endl; + << s2.second.count() << ", " << s3.second.count() << ", " + << s4.second.count() << std::endl; } - stats_checked.clear(); - stats_unchecked.clear(); + stats_sn.clear(); + stats_sn_checked.clear(); stats_platform.clear(); + stats_platform_checked.clear(); } #else snmalloc::UNUSED(opt); diff --git a/3rdparty/exported/snmalloc/src/test/perf/singlethread/singlethread.cc b/3rdparty/exported/snmalloc/src/test/perf/singlethread/singlethread.cc index 29595230d886..b93dcd428a2a 100644 --- a/3rdparty/exported/snmalloc/src/test/perf/singlethread/singlethread.cc +++ b/3rdparty/exported/snmalloc/src/test/perf/singlethread/singlethread.cc @@ -60,7 +60,7 @@ void test_alloc_dealloc(size_t count, size_t size, bool write) } } - snmalloc::debug_check_empty(); + snmalloc::debug_check_empty(); } int main(int, char**) diff --git a/3rdparty/exported/snmalloc/src/test/setup.h b/3rdparty/exported/snmalloc/src/test/setup.h index 642720ddf532..61f9a991c178 100644 --- a/3rdparty/exported/snmalloc/src/test/setup.h +++ b/3rdparty/exported/snmalloc/src/test/setup.h @@ -64,7 +64,7 @@ void print_stack_trace() void _cdecl error(int signal) { snmalloc::UNUSED(signal); - snmalloc::Pal::message("*****ABORT******"); + snmalloc::DefaultPal::message("*****ABORT******"); print_stack_trace(); @@ -75,7 +75,7 @@ LONG WINAPI VectoredHandler(struct _EXCEPTION_POINTERS* ExceptionInfo) { snmalloc::UNUSED(ExceptionInfo); - snmalloc::Pal::message("*****UNHANDLED EXCEPTION******"); + snmalloc::DefaultPal::message("*****UNHANDLED EXCEPTION******"); print_stack_trace(); diff --git a/cgmanifest.json b/cgmanifest.json index c111ba357483..cbadb32e27d5 100644 --- a/cgmanifest.json +++ b/cgmanifest.json @@ -60,7 +60,7 @@ "type": "git", "git": { "repositoryUrl": "https://github.com/microsoft/snmalloc", - "commitHash": "d5c732f3c14969d119178cbc8382d592800c5421" + "commitHash": "dc1268886a5d49d38a54e5d1402b5924a71fee0b" } } },