From 9616009103e8793ca10c60deecc4d76971a36999 Mon Sep 17 00:00:00 2001 From: Eric Niebler Date: Fri, 8 Nov 2024 15:18:45 -0800 Subject: [PATCH] make the empty parens after level constructors optional (#2750) For example, what before needed to be written as: ```cpp cudax::make_hierarchy(cudax::block_dims(), cudax::grid_dims()) ``` can now be written as: ```cpp cudax::make_hierarchy(cudax::block_dims, cudax::grid_dims) ``` --------- Co-authored-by: pciolkosz --- .../__hierarchy/hierarchy_dimensions.cuh | 267 ++++++++++-------- .../__hierarchy/hierarchy_levels.cuh | 16 +- .../__hierarchy/level_dimensions.cuh | 18 ++ .../test/hierarchy/hierarchy_custom_types.cu | 31 -- cudax/test/hierarchy/hierarchy_smoke.cu | 4 + docs/repo.toml | 7 + 6 files changed, 177 insertions(+), 166 deletions(-) diff --git a/cudax/include/cuda/experimental/__hierarchy/hierarchy_dimensions.cuh b/cudax/include/cuda/experimental/__hierarchy/hierarchy_dimensions.cuh index 9e1df486adf..f5e40d81583 100644 --- a/cudax/include/cuda/experimental/__hierarchy/hierarchy_dimensions.cuh +++ b/cudax/include/cuda/experimental/__hierarchy/hierarchy_dimensions.cuh @@ -11,9 +11,16 @@ #ifndef _CUDAX__HIERARCHY_HIERARCHY_DIMENSIONS #define _CUDAX__HIERARCHY_HIERARCHY_DIMENSIONS +#include +#include +#include +#include #include +#include +#include #include +#include #include #include @@ -32,6 +39,24 @@ struct unknown_unit : public hierarchy_level }; */ +namespace detail +{ +template +_CCCL_NODISCARD _CUDAX_API constexpr auto __as_level(_Level __l) noexcept -> _Level +{ + return __l; +} + +template +_CCCL_NODISCARD _CUDAX_API constexpr auto __as_level(_LevelFn* __fn) noexcept -> decltype(__fn()) +{ + return {}; +} +} // namespace detail + +template +using __level_type_of = typename _Level::level_type; + template struct hierarchy_dimensions_fragment; @@ -44,7 +69,7 @@ namespace detail // Function to sometimes convince the compiler something is a constexpr and not really accessing runtime storage // Mostly a work around for what was addressed in P2280 (c++23) by leveraging the argumentless constructor of extents template -_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto fool_compiler(const dimensions& ex) +_CCCL_NODISCARD _CUDAX_API constexpr auto fool_compiler(const dimensions& ex) { if constexpr (dimensions::rank_dynamic() == 0) { @@ -62,13 +87,13 @@ struct has_level_helper; template struct has_level_helper> - : public ::cuda::std::_Or<::cuda::std::is_same...> + : public ::cuda::std::__fold_or<::cuda::std::is_same_v>...> {}; // Is this needed? template struct has_level_helper> - : public ::cuda::std::_Or<::cuda::std::is_same...> + : public ::cuda::std::__fold_or<::cuda::std::is_same_v>...> {}; template @@ -79,16 +104,13 @@ template struct has_unit> : ::cuda::std::is_same {}; -template -using level_at_index = typename ::cuda::std::tuple_element>::type; - template struct get_level_helper { template - _CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto& operator()(const TopLevel& top, const Levels&... levels) + _CCCL_NODISCARD _CUDAX_API constexpr auto& operator()(const TopLevel& top, const Levels&... levels) { - if constexpr (_CCCL_TRAIT(::cuda::std::is_same, QueryLevel, typename TopLevel::level_type)) + if constexpr (::cuda::std::is_same_v>) { return top; } @@ -103,12 +125,12 @@ struct get_level_helper template _CCCL_INLINE_VAR constexpr bool has_level = - detail::has_level_helper>>::value; + detail::has_level_helper>::value; template _CCCL_INLINE_VAR constexpr bool has_level_or_unit = - detail::has_level_helper>>::value - || detail::has_unit>>::value; + detail::has_level_helper>::value + || detail::has_unit>::value; namespace detail { @@ -116,45 +138,51 @@ template struct can_stack_checker { template - static constexpr bool can_stack = (detail::can_stack_on_top && ...); + using can_stack = ::cuda::std::__fold_and...>; }; -template -constexpr auto hierarchy_dimensions_fragment_reversed(::cuda::std::index_sequence, const Levels&&... ls); +template +_CCCL_INLINE_VAR constexpr bool __can_stack = + can_stack_checker<__level_type_of, + __level_type_of...>::template can_stack<__level_type_of..., LUnit>::value; + +template +_CUDAX_API constexpr auto __reverse_indices(::cuda::std::index_sequence<_Id...>) noexcept +{ + return ::cuda::std::index_sequence<(sizeof...(_Id) - 1 - _Id)...>(); +} -template -_CCCL_NODISCARD constexpr auto make_hierarchy_fragment_reversable(L1&& l1, Levels&&... ls) noexcept +template +struct __make_hierarchy_fragment { - using checker = can_stack_checker::level_type, - typename ::cuda::std::remove_reference_t::level_type...>; - constexpr bool can_stack = - checker::template can_stack::level_type..., LUnit>; - static_assert(can_stack || !Reversed, - "Provided levels can't create a valid hierarchy when stacked in the provided order or reversed"); - if constexpr (can_stack) + template + _CCCL_NODISCARD _CUDAX_TRIVIAL_API static constexpr auto + __apply_reverse(const Levels& ls, ::cuda::std::index_sequence<_Ids...>) noexcept { - return hierarchy_dimensions_fragment(LUnit{}, ::cuda::std::forward(l1), ::cuda::std::forward(ls)...); + return __make_hierarchy_fragment()(::cuda::std::get<_Ids>(ls)...); } - else + + template + _CCCL_NODISCARD _CUDAX_API constexpr auto operator()(const Levels&... ls) const noexcept { - return hierarchy_dimensions_fragment_reversed( - ::cuda::std::index_sequence_for(), - ::cuda::std::forward(l1), - ::cuda::std::forward(ls)...); + if constexpr (__can_stack) + { + return hierarchy_dimensions_fragment(LUnit{}, ls...); + } + else if constexpr (!Reversed) + { + return __apply_reverse(::cuda::std::tie(ls...), __reverse_indices(::cuda::std::index_sequence_for())); + } + else + { + static_assert(__can_stack, + "Provided levels can't create a valid hierarchy when stacked in the provided order or reversed"); + } } -} - -template -_CCCL_NODISCARD constexpr auto -hierarchy_dimensions_fragment_reversed(::cuda::std::index_sequence, Levels&&... ls) -{ - auto tied = ::cuda::std::forward_as_tuple(::cuda::std::forward(ls)...); - return make_hierarchy_fragment_reversable( - ::cuda::std::get(::cuda::std::move(tied))...); -} +}; template -_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto get_levels_range_end() noexcept +_CCCL_NODISCARD _CUDAX_API constexpr auto get_levels_range_end() noexcept { return ::cuda::std::make_tuple(); } @@ -162,10 +190,10 @@ _CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto get_levels_range_end() noexcept // Find LUnit in Levels... and discard the rest // maybe_unused needed for MSVC template -_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto +_CCCL_NODISCARD _CUDAX_API constexpr auto get_levels_range_end(const LDims& l, [[maybe_unused]] const Levels&... levels) noexcept { - if constexpr (::cuda::std::is_same_v) + if constexpr (::cuda::std::is_same_v>) { return ::cuda::std::make_tuple(); } @@ -177,10 +205,9 @@ get_levels_range_end(const LDims& l, [[maybe_unused]] const Levels&... levels) n // Find the LTop in Levels... and discard the preceeding ones template -_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto -get_levels_range_start(const LTopDims& ltop, const Levels&... levels) noexcept +_CCCL_NODISCARD _CUDAX_API constexpr auto get_levels_range_start(const LTopDims& ltop, const Levels&... levels) noexcept { - if constexpr (::cuda::std::is_same_v) + if constexpr (::cuda::std::is_same_v>) { return get_levels_range_end(ltop, levels...); } @@ -192,32 +219,32 @@ get_levels_range_start(const LTopDims& ltop, const Levels&... levels) noexcept // Creates a new hierachy from Levels... cutting out levels between LTop and LUnit template -_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto get_levels_range(const Levels&... levels) noexcept +_CCCL_NODISCARD _CUDAX_API constexpr auto get_levels_range(const Levels&... levels) noexcept { return get_levels_range_start(levels...); } template -_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto -dims_to_count_helper(const dimensions ex, ::cuda::std::integer_sequence) +_CCCL_NODISCARD _CUDAX_API constexpr auto +dims_to_count_helper(const dimensions& ex, ::cuda::std::index_sequence) { return (ex.extent(Ids) * ...); } template -_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto dims_to_count(const dimensions& dims) noexcept +_CCCL_NODISCARD _CUDAX_API constexpr auto dims_to_count(const dimensions& dims) noexcept { - return dims_to_count_helper(dims, ::cuda::std::make_integer_sequence{}); + return dims_to_count_helper(dims, ::cuda::std::make_index_sequence{}); } template -_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto get_level_counts_helper(const Levels&... ls) +_CCCL_NODISCARD _CUDAX_API constexpr auto get_level_counts_helper(const Levels&... ls) { return ::cuda::std::make_tuple(dims_to_count(ls.dims)...); } template -_CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto replace_with_intrinsics_or_constexpr(const Dims& dims) +_CCCL_NODISCARD _CUDAX_API constexpr auto replace_with_intrinsics_or_constexpr(const Dims& dims) { if constexpr (is_core_cuda_hierarchy_level && is_core_cuda_hierarchy_level && Dims::rank_dynamic() != 0) { @@ -239,16 +266,16 @@ template struct hierarchy_extents_helper { template - _CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto operator()(const LTopDims& ltop, const Levels&... levels) noexcept + _CCCL_NODISCARD _CUDAX_API constexpr auto operator()(const LTopDims& ltop, const Levels&... levels) noexcept { - using TopLevel = typename LTopDims::level_type; + using TopLevel = __level_type_of; if constexpr (sizeof...(Levels) == 0) { return replace_with_intrinsics_or_constexpr(ltop.dims); } else { - using Unit = typename detail::get_first_level_type::type; + using Unit = ::cuda::std::__type_index_c<0, __level_type_of...>; return dims_product( replace_with_intrinsics_or_constexpr(ltop.dims), (*this)(levels...)); } @@ -268,14 +295,14 @@ struct index_helper template _CCCL_NODISCARD _CCCL_DEVICE constexpr auto operator()(const LTopDims& ltop, const Levels&... levels) noexcept { - using TopLevel = typename LTopDims::level_type; + using TopLevel = __level_type_of; if constexpr (sizeof...(Levels) == 0) { return static_index_hint(ltop.dims, dims_helper::index()); } else { - using Unit = typename detail::get_first_level_type::type; + using Unit = ::cuda::std::__type_index_c<0, __level_type_of...>; auto hinted_index = static_index_hint(ltop.dims, dims_helper::index()); return dims_sum( dims_product(hinted_index, hierarchy_extents_helper()(levels...)), @@ -290,7 +317,7 @@ struct rank_helper template _CCCL_NODISCARD _CCCL_DEVICE constexpr auto operator()(const LTopDims& ltop, const Levels&... levels) noexcept { - using TopLevel = typename LTopDims::level_type; + using TopLevel = __level_type_of; if constexpr (sizeof...(Levels) == 0) { auto hinted_index = static_index_hint(ltop.dims, dims_helper::index()); @@ -298,7 +325,7 @@ struct rank_helper } else { - using Unit = typename detail::get_first_level_type::type; + using Unit = ::cuda::std::__type_index_c<0, __level_type_of...>; auto hinted_index = static_index_hint(ltop.dims, dims_helper::index()); auto level_rank = detail::index_to_linear(hinted_index, ltop.dims); return level_rank * dims_to_count(hierarchy_extents_helper()(levels...)) @@ -344,39 +371,42 @@ struct hierarchy_dimensions_fragment static_assert(::cuda::std::is_base_of_v || ::cuda::std::is_same_v); ::cuda::std::tuple levels; - _CCCL_HOST_DEVICE constexpr hierarchy_dimensions_fragment(const Levels&... ls) noexcept + _CUDAX_API constexpr hierarchy_dimensions_fragment(const Levels&... ls) noexcept : levels(ls...) {} - _CCCL_HOST_DEVICE constexpr hierarchy_dimensions_fragment(Levels&&... ls) noexcept - : levels(::cuda::std::forward(ls)...) - {} - _CCCL_HOST_DEVICE constexpr hierarchy_dimensions_fragment(const BottomUnit&, const Levels&... ls) noexcept + _CUDAX_API constexpr hierarchy_dimensions_fragment(const BottomUnit&, const Levels&... ls) noexcept : levels(ls...) {} - _CCCL_HOST_DEVICE constexpr hierarchy_dimensions_fragment(const BottomUnit&, Levels&&... ls) noexcept - : levels(::cuda::std::forward(ls)...) - {} - _CCCL_HOST_DEVICE constexpr hierarchy_dimensions_fragment(const ::cuda::std::tuple& ls) noexcept + _CUDAX_API constexpr hierarchy_dimensions_fragment(const ::cuda::std::tuple& ls) noexcept : levels(ls) {} - _CCCL_HOST_DEVICE constexpr hierarchy_dimensions_fragment(::cuda::std::tuple&& ls) noexcept - : levels(::cuda::std::forward<::cuda::std::tuple>(ls)) - {} - _CCCL_HOST_DEVICE constexpr hierarchy_dimensions_fragment( - const BottomUnit& unit, const ::cuda::std::tuple& ls) noexcept + _CUDAX_API constexpr hierarchy_dimensions_fragment(const BottomUnit&, const ::cuda::std::tuple& ls) noexcept : levels(ls) {} - _CCCL_HOST_DEVICE constexpr hierarchy_dimensions_fragment( - const BottomUnit& unit, ::cuda::std::tuple&& ls) noexcept - : levels(::cuda::std::forward<::cuda::std::tuple>(ls)) - {} + +# if defined(__cpp_three_way_comparison) && __cpp_three_way_comparison >= 201907 + _CCCL_NODISCARD _CUDAX_API constexpr bool operator==(const hierarchy_dimensions_fragment&) const noexcept = default; +# else + _CCCL_NODISCARD_FRIEND _CUDAX_API constexpr bool + operator==(const hierarchy_dimensions_fragment& left, const hierarchy_dimensions_fragment& right) noexcept + { + return left.levels == right.levels; + } + + _CCCL_NODISCARD_FRIEND _CUDAX_API constexpr bool + operator!=(const hierarchy_dimensions_fragment& left, const hierarchy_dimensions_fragment& right) noexcept + { + return left.levels != right.levels; + } +# endif private: // This being static is a bit of a hack to make extents_type working without incomplete class member access template - _CCCL_NODISCARD _CCCL_HOST_DEVICE static constexpr auto levels_range_static(const decltype(levels)& levels) noexcept + _CCCL_NODISCARD _CUDAX_API static constexpr auto + levels_range_static(const ::cuda::std::tuple& levels) noexcept { static_assert(has_level>); static_assert(has_level_or_unit>); @@ -386,7 +416,7 @@ private: // TODO is this useful enough to expose? template - _CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto levels_range() const noexcept + _CCCL_NODISCARD _CUDAX_API constexpr auto levels_range() const noexcept { return levels_range_static(levels); } @@ -395,7 +425,7 @@ private: struct fragment_helper { template - _CCCL_NODISCARD _CCCL_HOST_DEVICE constexpr auto operator()(const Selected&... levels) const noexcept + _CCCL_NODISCARD _CUDAX_API constexpr auto operator()(const Selected&... levels) const noexcept { return hierarchy_dimensions_fragment(levels...); } @@ -403,9 +433,9 @@ private: public: template - using extents_type = - decltype(::cuda::std::apply(::cuda::std::declval>(), - levels_range_static(::cuda::std::declval()))); + using extents_type = decltype(::cuda::std::apply( + ::cuda::std::declval>(), + levels_range_static(::cuda::std::declval<::cuda::std::tuple>()))); /** * @brief Get a fragment of this hierarchy @@ -433,7 +463,7 @@ public: * Type indicating what should be the top most level of the resulting fragment */ template - _CCCL_HOST_DEVICE constexpr auto fragment(const Unit& = Unit(), const Level& = Level()) const noexcept + _CUDAX_API constexpr auto fragment(const Unit& = Unit(), const Level& = Level()) const noexcept { auto selected = levels_range(); // TODO fragment can't do constexpr queries because we use references here, can we create copies of the levels in @@ -473,9 +503,8 @@ public: * @tparam Level * Specifies at what CUDA hierarchy level the extents are requested */ - template ::type::level_type> - _CCCL_HOST_DEVICE constexpr auto extents(const Unit& = Unit(), const Level& = Level()) const noexcept + template >> + _CUDAX_API constexpr auto extents(const Unit& = Unit(), const Level& = Level()) const noexcept { auto selected = levels_range(); return detail::convert_to_query_result(::cuda::std::apply(detail::hierarchy_extents_helper{}, selected)); @@ -516,9 +545,8 @@ public: * @tparam Level * Specifies at what level the count should happen */ - template ::type::level_type> - _CCCL_HOST_DEVICE constexpr auto count(const Unit& = Unit(), const Level& = Level()) const noexcept + template >> + _CUDAX_API constexpr auto count(const Unit& = Unit(), const Level& = Level()) const noexcept { return detail::dims_to_count(extents()); } @@ -554,9 +582,8 @@ public: * @tparam Level * Specifies at what level the count should happen */ - template ::type::level_type> - _CCCL_HOST_DEVICE constexpr static auto static_count(const Unit& = Unit(), const Level& = Level()) noexcept + template >> + _CUDAX_API constexpr static auto static_count(const Unit& = Unit(), const Level& = Level()) noexcept { if constexpr (extents_type::rank_dynamic() == 0) { @@ -607,8 +634,7 @@ public: * @tparam Level * Specifies at what hierarchy level the index is requested */ - template ::type::level_type> + template >> _CCCL_DEVICE constexpr auto index(const Unit& = Unit(), const Level& = Level()) const noexcept { auto selected = levels_range(); @@ -650,8 +676,7 @@ public: * @tparam Level * Specifies at what level the rank is requested */ - template ::type::level_type> + template >> _CCCL_DEVICE constexpr auto rank(const Unit& = Unit(), const Level& = Level()) const noexcept { auto selected = levels_range(); @@ -680,7 +705,7 @@ public: * Specifies the requested level */ template - _CCCL_HOST_DEVICE constexpr auto level(const Level&) const noexcept + _CUDAX_API constexpr auto level(const Level&) const noexcept { static_assert(has_level>); @@ -739,10 +764,9 @@ constexpr auto _CCCL_HOST get_launch_dimensions(const hierarchy_dimensions -constexpr auto make_hierarchy_fragment(L1&& l1, Levels&&... ls) noexcept +constexpr auto make_hierarchy_fragment(L1 l1, Levels... ls) noexcept { - return detail::make_hierarchy_fragment_reversable( - ::cuda::std::forward(l1), ::cuda::std::forward(ls)...); + return detail::__make_hierarchy_fragment()(detail::__as_level(l1), detail::__as_level(ls)...); } /** @@ -767,45 +791,44 @@ constexpr auto make_hierarchy_fragment(L1&& l1, Levels&&... ls) noexcept * @par */ template -constexpr auto make_hierarchy(L1&& l1, Levels&&... ls) noexcept +constexpr auto make_hierarchy(L1 l1, Levels... ls) noexcept { - return detail::make_hierarchy_fragment_reversable( - ::cuda::std::forward(l1), ::cuda::std::forward(ls)...); + return detail::__make_hierarchy_fragment()(detail::__as_level(l1), detail::__as_level(ls)...); } -// We can consider removing the operator, but its convinient for in-line construction +// We can consider removing the operator&, but its convenient for in-line construction // TODO accept forwarding references -template -_CCCL_HOST_DEVICE constexpr auto -operator&(const hierarchy_dimensions_fragment& ls, const L1& l1) noexcept +template +_CUDAX_API constexpr auto operator&(const hierarchy_dimensions_fragment& ls, LNew lnew) noexcept { - using top_level = typename detail::level_at_index<0, Levels...>::level_type; - using bottom_level = typename detail::level_at_index::level_type; + auto new_level = detail::__as_level(lnew); + using NewLevel = decltype(new_level); + using top_level = __level_type_of<::cuda::std::__type_index_c<0, Levels...>>; + using bottom_level = __level_type_of<::cuda::std::__type_index_c>; - if constexpr (detail::can_stack_on_top) + if constexpr (detail::can_stack_on_top>) { - return hierarchy_dimensions_fragment( - ::cuda::std::tuple_cat(::cuda::std::make_tuple(l1), ls.levels)); + return hierarchy_dimensions_fragment( + ::cuda::std::tuple_cat(::cuda::std::make_tuple(new_level), ls.levels)); } else { - static_assert(detail::can_stack_on_top, + static_assert(detail::can_stack_on_top<__level_type_of, bottom_level>, "Not supported order of levels in hierarchy"); - using NewUnit = typename L1::level_type::allowed_below::default_unit; - return hierarchy_dimensions_fragment( - ::cuda::std::tuple_cat(ls.levels, ::cuda::std::make_tuple(l1))); + using NewUnit = typename __level_type_of::allowed_below::default_unit; + return hierarchy_dimensions_fragment( + ::cuda::std::tuple_cat(ls.levels, ::cuda::std::make_tuple(new_level))); } } template -_CCCL_HOST_DEVICE constexpr auto -operator&(const L1& l1, const hierarchy_dimensions_fragment& ls) noexcept +_CUDAX_API constexpr auto operator&(L1 l1, const hierarchy_dimensions_fragment& ls) noexcept { return ls & l1; } template -_CCCL_HOST_DEVICE constexpr auto +_CUDAX_API constexpr auto operator&(const level_dimensions& l1, const level_dimensions& l2) noexcept { return hierarchy_dimensions>(l1) & l2; @@ -834,9 +857,9 @@ operator&(const level_dimensions& l1, const level_dimensions -constexpr auto hierarchy_add_level(const hierarchy_dimensions_fragment& hierarchy, NewLevel&& level) +constexpr auto hierarchy_add_level(const hierarchy_dimensions_fragment& hierarchy, NewLevel level) { - return hierarchy & ::cuda::std::forward(level); + return hierarchy & level; } /** diff --git a/cudax/include/cuda/experimental/__hierarchy/hierarchy_levels.cuh b/cudax/include/cuda/experimental/__hierarchy/hierarchy_levels.cuh index 67fb9f05006..bec5095163a 100644 --- a/cudax/include/cuda/experimental/__hierarchy/hierarchy_levels.cuh +++ b/cudax/include/cuda/experimental/__hierarchy/hierarchy_levels.cuh @@ -11,6 +11,8 @@ #ifndef _CUDAX__HIERARCHY_HIERARCHY_LEVELS #define _CUDAX__HIERARCHY_HIERARCHY_LEVELS +#include + #include #include @@ -66,12 +68,6 @@ struct dimensions_query return hierarchy::extents(); } }; - -template -struct get_first_level_type -{ - using type = L1; -}; } // namespace detail // Struct to represent levels allowed below or above a certain level, @@ -79,13 +75,7 @@ struct get_first_level_type template struct allowed_levels { - using default_unit = typename detail::get_first_level_type::type; -}; - -template <> -struct allowed_levels<> -{ - using default_unit = void; + using default_unit = ::cuda::std::__type_index_c<0, Levels..., void>; }; namespace detail diff --git a/cudax/include/cuda/experimental/__hierarchy/level_dimensions.cuh b/cudax/include/cuda/experimental/__hierarchy/level_dimensions.cuh index 69bfd88e24a..3f99c30eaa5 100644 --- a/cudax/include/cuda/experimental/__hierarchy/level_dimensions.cuh +++ b/cudax/include/cuda/experimental/__hierarchy/level_dimensions.cuh @@ -11,8 +11,10 @@ #ifndef _CUDAX__HIERARCHY_LEVEL_DIMENSIONS #define _CUDAX__HIERARCHY_LEVEL_DIMENSIONS +#include #include +#include #include #if _CCCL_STD_VER >= 2017 @@ -126,6 +128,22 @@ struct level_dimensions {} _CCCL_HOST_DEVICE constexpr level_dimensions() : dims(){}; + +# if defined(__cpp_three_way_comparison) && __cpp_three_way_comparison >= 201907 + _CCCL_NODISCARD _CUDAX_API constexpr bool operator==(const level_dimensions&) const noexcept = default; +# else + _CCCL_NODISCARD_FRIEND _CUDAX_API constexpr bool + operator==(const level_dimensions& left, const level_dimensions& right) noexcept + { + return left.dims == right.dims; + } + + _CCCL_NODISCARD_FRIEND _CUDAX_API constexpr bool + operator!=(const level_dimensions& left, const level_dimensions& right) noexcept + { + return left.dims != right.dims; + } +# endif }; /** diff --git a/cudax/test/hierarchy/hierarchy_custom_types.cu b/cudax/test/hierarchy/hierarchy_custom_types.cu index f35a4914ced..4c7fc042bdf 100644 --- a/cudax/test/hierarchy/hierarchy_custom_types.cu +++ b/cudax/test/hierarchy/hierarchy_custom_types.cu @@ -70,34 +70,3 @@ TEST_CASE("Custom level", "[hierarchy]") { custom_level_test().run(); } - -template -struct level_disabled_copy : public cudax::level_dimensions -{ - constexpr __host__ __device__ level_disabled_copy(const Dims& d) - : cudax::level_dimensions(d) - {} - - constexpr level_disabled_copy(const level_disabled_copy& d) = delete; - constexpr level_disabled_copy(level_disabled_copy&& d) = default; -}; - -TEST_CASE("Disabled lvalue copy", "hierarchy") -{ - auto ext = cuda::std::extents(64, 1, 1); - auto ext_static = cuda::std::extents(); - auto block_dims = level_disabled_copy(ext); - auto block_dims2 = level_disabled_copy(ext); - auto block_dims_static = level_disabled_copy(ext_static); - - auto hierarchy = cudax::make_hierarchy(cudax::grid_dims(256), std::move(block_dims)); - auto hierarchy_rev = cudax::make_hierarchy(std::move(block_dims2), cudax::grid_dims(256)); - static_assert(std::is_same_v); - - CUDAX_REQUIRE(hierarchy.count() == 256 * 64); - CUDAX_REQUIRE(hierarchy_rev.count() == 256 * 64); - - auto hierarchy_static = cudax::make_hierarchy(std::move(block_dims_static), cudax::grid_dims(256)); - - static_assert(hierarchy_static.count(cudax::thread, cudax::block) == 64); -} diff --git a/cudax/test/hierarchy/hierarchy_smoke.cu b/cudax/test/hierarchy/hierarchy_smoke.cu index fc78ca45049..582e745ce3c 100644 --- a/cudax/test/hierarchy/hierarchy_smoke.cu +++ b/cudax/test/hierarchy/hierarchy_smoke.cu @@ -62,6 +62,10 @@ struct basic_test_single_dim static_assert(dimensions_dyn.static_count(cudax::thread, cudax::block) == cuda::std::dynamic_extent); static_assert(dimensions_dyn.static_count(cudax::thread, cudax::grid) == cuda::std::dynamic_extent); + + // Test that we can also drop the empty parens in the level constructors: + auto dimensions2 = cudax::make_hierarchy(cudax::block_dims, cudax::grid_dims); + CUDAX_REQUIRE(dimensions == dimensions2); } }; diff --git a/docs/repo.toml b/docs/repo.toml index 2bc5748922d..43d41855ca6 100644 --- a/docs/repo.toml +++ b/docs/repo.toml @@ -426,6 +426,13 @@ doxygen_predefined = [ "_CCCL_DIAG_SUPPRESS_ICC(x)=", "_CCCL_DIAG_SUPPRESS_MSVC(x)=", "_CCCL_DIAG_SUPPRESS_NVHPC(x)=", + "_CUDAX_API", + "_CUDAX_HOST_API", + "_CUDAX_DEVICE_API", + "_CUDAX_TRIVIAL_API", + "_CUDAX_TRIVIAL_HOST_API", + "_CUDAX_TRIVIAL_DEVICE_API", + "_CUDAX_PUBLIC_API", "_LIBCUDACXX_AND=&&", "_LIBCUDACXX_EAT_REST(x)=", "_LIBCUDACXX_GLOBAL_CONSTANT=inline",