diff --git a/.clang-format b/.clang-format index 93054f09ab7..5e9229b265d 100644 --- a/.clang-format +++ b/.clang-format @@ -86,7 +86,7 @@ PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 200 -PointerAlignment: Right +PointerAlignment: Left ReflowComments: true SortIncludes: true SortUsingDeclarations: true diff --git a/accessor/accessor_helper.hpp b/accessor/accessor_helper.hpp index 1d90004d8f8..06fb8cd9738 100644 --- a/accessor/accessor_helper.hpp +++ b/accessor/accessor_helper.hpp @@ -73,10 +73,10 @@ struct row_major_helper_s { template static constexpr GKO_ACC_ATTRIBUTES ValueType - compute(const std::array &size, - const std::array 1 ? total_dim - 1 : 0)> - &stride, - FirstType first, Indices &&... idxs) + compute(const std::array& size, + const std::array 1 ? total_dim - 1 : 0)>& + stride, + FirstType first, Indices&&... idxs) { // The ASSERT size check must NOT be indexed with `dim_idx` directy, // otherwise, it leads to a linker error. The reason is likely that @@ -97,7 +97,7 @@ template struct row_major_helper_s { template static constexpr GKO_ACC_ATTRIBUTES ValueType - compute(const std::array &size, + compute(const std::array& size, const std::array 1 ? total_dim - 1 : 0)>, FirstType first) { @@ -109,7 +109,7 @@ struct row_major_helper_s { template constexpr GKO_ACC_ATTRIBUTES std::enable_if_t -mult_dim_upwards(const std::array &) +mult_dim_upwards(const std::array&) { return 1; } @@ -117,7 +117,7 @@ mult_dim_upwards(const std::array &) template constexpr GKO_ACC_ATTRIBUTES std::enable_if_t<(iter < N), ValueType> -mult_dim_upwards(const std::array &size) +mult_dim_upwards(const std::array& size) { return size[iter] * mult_dim_upwards(size); } @@ -128,8 +128,8 @@ template > - compute_default_row_major_stride_array(const std::array &, - Args &&... args) + compute_default_row_major_stride_array(const std::array&, + Args&&... args) { return {{std::forward(args)...}}; } @@ -138,8 +138,8 @@ template constexpr GKO_ACC_ATTRIBUTES std::enable_if_t< (iter < N) && (iter == sizeof...(Args) + 1), std::array> -compute_default_row_major_stride_array(const std::array &size, - Args &&... args) +compute_default_row_major_stride_array(const std::array& size, + Args&&... args) { return compute_default_row_major_stride_array( size, std::forward(args)..., @@ -157,9 +157,9 @@ compute_default_row_major_stride_array(const std::array &size, template constexpr GKO_ACC_ATTRIBUTES ValueType compute_row_major_index( - const std::array &size, - const std::array 1 ? total_dim - 1 : 0)> &stride, - Indices &&... idxs) + const std::array& size, + const std::array 1 ? total_dim - 1 : 0)>& stride, + Indices&&... idxs) { return detail::row_major_helper_s< ValueType, total_dim, DimensionType>::compute(size, stride, @@ -188,7 +188,7 @@ template constexpr GKO_ACC_ATTRIBUTES std::array 0 ? dimensions - 1 : 0)> compute_default_row_major_stride_array( - const std::array &size) + const std::array& size) { return detail::compute_default_row_major_stride_array(size); } @@ -218,7 +218,7 @@ template (mask &( + ? static_cast(mask&( size_type{1} << (total_dim - 1 - dim_idx))) : false> struct row_major_masked_helper_s {}; @@ -242,8 +242,8 @@ struct row_major_masked_helper_s static constexpr GKO_ACC_ATTRIBUTES std::array - build_stride(const std::array &size, - Args &&... args) + build_stride(const std::array& size, + Args&&... args) { return row_major_masked_helper_s< ValueType, mask, set_bits_processed + 1, stride_size, dim_idx + 1, @@ -253,7 +253,7 @@ struct row_major_masked_helper_s &size) + mult_size_upwards(const std::array& size) { return size[dim_idx] * row_major_masked_helper_s< ValueType, mask, set_bits_processed + 1, @@ -263,9 +263,9 @@ struct row_major_masked_helper_s static constexpr GKO_ACC_ATTRIBUTES ValueType - compute_mask_idx(const std::array &size, - const std::array &stride, - First first, Indices &&... idxs) + compute_mask_idx(const std::array& size, + const std::array& stride, + First first, Indices&&... idxs) { static_assert(sizeof...(Indices) + 1 == total_dim - dim_idx, "Mismatching number of Idxs!"); @@ -284,9 +284,9 @@ struct row_major_masked_helper_s static constexpr GKO_ACC_ATTRIBUTES ValueType - compute_direct_idx(const std::array &size, - const std::array &stride, - First first, Indices &&... idxs) + compute_direct_idx(const std::array& size, + const std::array& stride, + First first, Indices&&... idxs) { static_assert(sizeof...(Indices) == stride_size - set_bits_processed, "Mismatching number of Idxs!"); @@ -320,8 +320,8 @@ struct row_major_masked_helper_s static constexpr GKO_ACC_ATTRIBUTES std::array - build_stride(const std::array &size, - Args &&... args) + build_stride(const std::array& size, + Args&&... args) { return row_major_masked_helper_s< ValueType, mask, set_bits_processed + 1, stride_size, dim_idx + 1, @@ -331,7 +331,7 @@ struct row_major_masked_helper_s &size) + mult_size_upwards(const std::array& size) { return row_major_masked_helper_s< ValueType, mask, set_bits_processed + 1, stride_size, dim_idx + 1, @@ -340,9 +340,9 @@ struct row_major_masked_helper_s static constexpr GKO_ACC_ATTRIBUTES ValueType - compute_mask_idx(const std::array &size, - const std::array &stride, - First first, Indices &&... idxs) + compute_mask_idx(const std::array& size, + const std::array& stride, + First first, Indices&&... idxs) { static_assert(sizeof...(Indices) + 1 == total_dim - dim_idx, "Mismatching number of Idxs!"); @@ -361,9 +361,9 @@ struct row_major_masked_helper_s static constexpr GKO_ACC_ATTRIBUTES ValueType - compute_direct_idx(const std::array &size, - const std::array &stride, - First first, Indices &&... idxs) + compute_direct_idx(const std::array& size, + const std::array& stride, + First first, Indices&&... idxs) { static_assert(sizeof...(Indices) == stride_size - set_bits_processed, "Mismatching number of Idxs!"); @@ -398,8 +398,8 @@ struct row_major_masked_helper_s static constexpr GKO_ACC_ATTRIBUTES std::array - build_stride(const std::array &size, - Args &&... args) + build_stride(const std::array& size, + Args&&... args) { return row_major_masked_helper_s< ValueType, mask, set_bits_processed, stride_size, dim_idx + 1, @@ -409,7 +409,7 @@ struct row_major_masked_helper_s &size) + mult_size_upwards(const std::array& size) { return row_major_masked_helper_s< ValueType, mask, set_bits_processed, stride_size, dim_idx + 1, @@ -418,9 +418,9 @@ struct row_major_masked_helper_s static constexpr GKO_ACC_ATTRIBUTES ValueType - compute_mask_idx(const std::array &size, - const std::array &stride, First, - Indices &&... idxs) + compute_mask_idx(const std::array& size, + const std::array& stride, First, + Indices&&... idxs) { static_assert(sizeof...(Indices) + 1 == total_dim - dim_idx, "Mismatching number of Idxs!"); @@ -433,8 +433,8 @@ struct row_major_masked_helper_s static constexpr GKO_ACC_ATTRIBUTES ValueType compute_direct_idx( - const std::array &size, - const std::array &stride, Indices &&... idxs) + const std::array& size, + const std::array& stride, Indices&&... idxs) { return row_major_masked_helper_s< ValueType, mask, set_bits_processed, stride_size, dim_idx + 1, @@ -456,25 +456,25 @@ struct row_major_masked_helper_s static constexpr GKO_ACC_ATTRIBUTES std::array - build_stride(const std::array &, Args &&... args) + build_stride(const std::array&, Args&&... args) { return {{std::forward(args)...}}; } static constexpr GKO_ACC_ATTRIBUTES ValueType - mult_size_upwards(const std::array &) + mult_size_upwards(const std::array&) { return 1; } static constexpr GKO_ACC_ATTRIBUTES ValueType - compute_mask_idx(const std::array &, - const std::array &) + compute_mask_idx(const std::array&, + const std::array&) { return 0; } static constexpr GKO_ACC_ATTRIBUTES ValueType - compute_direct_idx(const std::array &, - const std::array &) + compute_direct_idx(const std::array&, + const std::array&) { return 0; } @@ -491,8 +491,8 @@ struct row_major_masked_helper_s constexpr GKO_ACC_ATTRIBUTES auto compute_masked_index( - const std::array &size, - const std::array &stride, Indices &&... idxs) + const std::array& size, + const std::array& stride, Indices&&... idxs) { return detail::row_major_masked_helper_s< ValueType, mask, 0, stride_size, 0, total_dim, @@ -507,8 +507,8 @@ constexpr GKO_ACC_ATTRIBUTES auto compute_masked_index( template constexpr GKO_ACC_ATTRIBUTES auto compute_masked_index_direct( - const std::array &size, - const std::array &stride, Indices &&... idxs) + const std::array& size, + const std::array& stride, Indices&&... idxs) { return detail::row_major_masked_helper_s< ValueType, mask, 0, stride_size, 0, total_dim, @@ -525,7 +525,7 @@ constexpr GKO_ACC_ATTRIBUTES auto compute_masked_index_direct( template constexpr GKO_ACC_ATTRIBUTES auto compute_default_masked_row_major_stride_array( - const std::array &size) + const std::array& size) { return detail::row_major_masked_helper_s GKO_ACC_ATTRIBUTES std::enable_if_t multidim_for_each_impl( - const std::array &, Callable callable, - Indices &&... indices) + const std::array&, Callable callable, + Indices&&... indices) { static_assert(iter == sizeof...(Indices), "Number arguments must match current iteration!"); @@ -580,7 +580,7 @@ GKO_ACC_ATTRIBUTES std::enable_if_t multidim_for_each_impl( template GKO_ACC_ATTRIBUTES std::enable_if_t<(iter < N)> multidim_for_each_impl( - const std::array &size, Callable callable, + const std::array& size, Callable callable, Indices... indices) { static_assert(iter == sizeof...(Indices), @@ -600,7 +600,7 @@ GKO_ACC_ATTRIBUTES std::enable_if_t<(iter < N)> multidim_for_each_impl( */ template GKO_ACC_ATTRIBUTES void multidim_for_each( - const std::array &size, Callable &&callable) + const std::array& size, Callable&& callable) { detail::multidim_for_each_impl<0>(size, std::forward(callable)); } @@ -611,7 +611,7 @@ namespace detail { template constexpr GKO_ACC_ATTRIBUTES std::enable_if_t -index_spans_in_size(const std::array &) +index_spans_in_size(const std::array&) { return 0; } @@ -619,8 +619,8 @@ index_spans_in_size(const std::array &) template constexpr GKO_ACC_ATTRIBUTES std::enable_if_t<(iter < N), int> -index_spans_in_size(const std::array &size, First first, - Remaining &&... remaining) +index_spans_in_size(const std::array& size, First first, + Remaining&&... remaining) { static_assert(sizeof...(Remaining) + 1 == N - iter, "Number of remaining spans must be equal to N - iter"); @@ -636,7 +636,7 @@ index_spans_in_size(const std::array &size, First first, template constexpr GKO_ACC_ATTRIBUTES int validate_index_spans( - const std::array &size, Spans &&... spans) + const std::array& size, Spans&&... spans) { return detail::index_spans_in_size<0>(size, std::forward(spans)...); } @@ -697,10 +697,10 @@ struct index_helper_s { template static constexpr GKO_ACC_ATTRIBUTES ValueType - compute(const std::array &size, - const std::array 0 ? total_dim - 1 : 0)> - &stride, - FirstType first, Indices &&... idxs) + compute(const std::array& size, + const std::array 0 ? total_dim - 1 : 0)>& + stride, + FirstType first, Indices&&... idxs) { if (current_iter == total_dim - 1) { return GKO_ACC_ASSERT(first < size[dim_idx]), @@ -725,9 +725,9 @@ struct index_helper_s { template static constexpr GKO_ACC_ATTRIBUTES ValueType - compute(const std::array &size, - const std::array 1 ? total_dim - 1 : 0)> - &stride, + compute(const std::array& size, + const std::array 1 ? total_dim - 1 : 0)>& + stride, FirstType first) { return GKO_ACC_ASSERT(first < size[total_dim - 1]), @@ -744,9 +744,9 @@ struct index_helper_s { */ template constexpr GKO_ACC_ATTRIBUTES ValueType compute_index( - const std::array &size, - const std::array 0 ? total_dim - 1 : 0)> &stride, - Indices &&... idxs) + const std::array& size, + const std::array 0 ? total_dim - 1 : 0)>& stride, + Indices&&... idxs) { return index_helper_s::compute( size, stride, std::forward(idxs)...); @@ -757,8 +757,8 @@ template constexpr GKO_ACC_ATTRIBUTES std::enable_if_t<(iter == N - 1) && (iter == sizeof...(Args) + 1), std::array> - default_stride_array_impl(const std::array &size, - Args &&... args) + default_stride_array_impl(const std::array& size, + Args&&... args) { return {{std::forward(args)..., size[N - 2]}}; } @@ -767,7 +767,7 @@ template constexpr GKO_ACC_ATTRIBUTES std::enable_if_t<(iter < N - 1 || iter == N) && (iter == sizeof...(Args) + 1), std::array> -default_stride_array_impl(const std::array &size, Args &&... args) +default_stride_array_impl(const std::array& size, Args&&... args) { return default_stride_array_impl( size, std::forward(args)..., @@ -777,7 +777,7 @@ default_stride_array_impl(const std::array &size, Args &&... args) template constexpr GKO_ACC_ATTRIBUTES std::array 0 ? dimensions - 1 : 0)> - default_stride_array(const std::array &size) + default_stride_array(const std::array& size) { return default_stride_array_impl(size); } diff --git a/accessor/accessor_references.hpp b/accessor/accessor_references.hpp index 19c2ddbc49a..51537d5658d 100644 --- a/accessor/accessor_references.hpp +++ b/accessor/accessor_references.hpp @@ -89,7 +89,7 @@ struct has_cast_operator< template constexpr GKO_ACC_ATTRIBUTES std::enable_if_t::value, ValueType> - to_value_type(const Ref &ref) + to_value_type(const Ref& ref) { return ref.Ref::operator ValueType(); } @@ -97,7 +97,7 @@ constexpr GKO_ACC_ATTRIBUTES template constexpr GKO_ACC_ATTRIBUTES std::enable_if_t::value, ValueType> - to_value_type(const Ref &ref) + to_value_type(const Ref& ref) { return static_cast(ref); } @@ -126,20 +126,20 @@ struct enable_reference_operators { #define GKO_ACC_REFERENCE_BINARY_OPERATOR_OVERLOAD(_op) \ friend GKO_ACC_ENABLE_REFERENCE_CONSTEXPR GKO_ACC_INLINE \ GKO_ACC_ATTRIBUTES arithmetic_type \ - operator _op(const Reference &ref1, const Reference &ref2) \ + operator _op(const Reference& ref1, const Reference& ref2) \ { \ return to_value_type(ref1) \ _op to_value_type(ref2); \ } \ friend GKO_ACC_ENABLE_REFERENCE_CONSTEXPR GKO_ACC_INLINE \ GKO_ACC_ATTRIBUTES arithmetic_type \ - operator _op(const Reference &ref, const arithmetic_type &a) \ + operator _op(const Reference& ref, const arithmetic_type& a) \ { \ return to_value_type(ref) _op a; \ } \ friend GKO_ACC_ENABLE_REFERENCE_CONSTEXPR GKO_ACC_INLINE \ GKO_ACC_ATTRIBUTES arithmetic_type \ - operator _op(const arithmetic_type &a, const Reference &ref) \ + operator _op(const arithmetic_type& a, const Reference& ref) \ { \ return a _op to_value_type(ref); \ } @@ -153,14 +153,14 @@ struct enable_reference_operators { #define GKO_ACC_REFERENCE_ASSIGNMENT_OPERATOR_OVERLOAD(_oper, _op) \ friend GKO_ACC_ENABLE_REFERENCE_CONSTEXPR GKO_ACC_INLINE \ GKO_ACC_ATTRIBUTES arithmetic_type \ - _oper(Reference &&ref1, const Reference &ref2) \ + _oper(Reference&& ref1, const Reference& ref2) \ { \ return std::move(ref1) = to_value_type(ref1) \ _op to_value_type(ref2); \ } \ friend GKO_ACC_ENABLE_REFERENCE_CONSTEXPR GKO_ACC_INLINE \ GKO_ACC_ATTRIBUTES arithmetic_type \ - _oper(Reference &&ref, const arithmetic_type &a) \ + _oper(Reference&& ref, const arithmetic_type& a) \ { \ return std::move(ref) = to_value_type(ref) _op a; \ } @@ -174,20 +174,20 @@ struct enable_reference_operators { #define GKO_ACC_REFERENCE_COMPARISON_OPERATOR_OVERLOAD(_op) \ friend GKO_ACC_ENABLE_REFERENCE_CONSTEXPR GKO_ACC_INLINE \ GKO_ACC_ATTRIBUTES bool \ - operator _op(const Reference &ref1, const Reference &ref2) \ + operator _op(const Reference& ref1, const Reference& ref2) \ { \ return to_value_type(ref1) \ _op to_value_type(ref2); \ } \ friend GKO_ACC_ENABLE_REFERENCE_CONSTEXPR GKO_ACC_INLINE \ GKO_ACC_ATTRIBUTES bool \ - operator _op(const Reference &ref, const arithmetic_type &a) \ + operator _op(const Reference& ref, const arithmetic_type& a) \ { \ return to_value_type(ref) _op a; \ } \ friend GKO_ACC_ENABLE_REFERENCE_CONSTEXPR GKO_ACC_INLINE \ GKO_ACC_ATTRIBUTES bool \ - operator _op(const arithmetic_type &a, const Reference &ref) \ + operator _op(const arithmetic_type& a, const Reference& ref) \ { \ return a _op to_value_type(ref); \ } @@ -197,7 +197,7 @@ struct enable_reference_operators { friend GKO_ACC_ENABLE_REFERENCE_CONSTEXPR GKO_ACC_INLINE GKO_ACC_ATTRIBUTES arithmetic_type - operator-(const Reference &ref) + operator-(const Reference& ref) { return -to_value_type(ref); } @@ -244,50 +244,52 @@ class reduced_storage // Allow move construction, so perfect forwarding is possible (required // for `range` support) - reduced_storage(reduced_storage &&) = default; + reduced_storage(reduced_storage&&) = default; reduced_storage() = delete; ~reduced_storage() = default; // Forbid copy construction - reduced_storage(const reduced_storage &) = delete; + reduced_storage(const reduced_storage&) = delete; constexpr explicit GKO_ACC_ATTRIBUTES reduced_storage( - storage_type *const GKO_ACC_RESTRICT ptr) + storage_type* const GKO_ACC_RESTRICT ptr) : ptr_{ptr} {} constexpr GKO_ACC_ATTRIBUTES operator arithmetic_type() const { - const storage_type *const GKO_ACC_RESTRICT r_ptr = ptr_; + const storage_type* const GKO_ACC_RESTRICT r_ptr = ptr_; return static_cast(*r_ptr); } constexpr GKO_ACC_ATTRIBUTES arithmetic_type - operator=(arithmetic_type val) &&noexcept + operator=(arithmetic_type val) && + noexcept { - storage_type *const GKO_ACC_RESTRICT r_ptr = ptr_; + storage_type* const GKO_ACC_RESTRICT r_ptr = ptr_; *r_ptr = static_cast(val); return val; } constexpr GKO_ACC_ATTRIBUTES arithmetic_type - operator=(const reduced_storage &ref) && + operator=(const reduced_storage& ref) && { std::move(*this) = static_cast(ref); return static_cast(*this); } constexpr GKO_ACC_ATTRIBUTES arithmetic_type - operator=(reduced_storage &&ref) &&noexcept + operator=(reduced_storage&& ref) && + noexcept { std::move(*this) = static_cast(ref); return static_cast(*this); } private: - storage_type *const GKO_ACC_RESTRICT ptr_; + storage_type* const GKO_ACC_RESTRICT ptr_; }; // Specialization for const storage_type to prevent `operator=` @@ -300,36 +302,36 @@ class reduced_storage using storage_type = const StorageType; // Allow move construction, so perfect forwarding is possible - reduced_storage(reduced_storage &&) = default; + reduced_storage(reduced_storage&&) = default; reduced_storage() = delete; ~reduced_storage() = default; // Forbid copy construction and move assignment - reduced_storage(const reduced_storage &) = delete; + reduced_storage(const reduced_storage&) = delete; - reduced_storage &operator=(reduced_storage &&) = delete; + reduced_storage& operator=(reduced_storage&&) = delete; constexpr explicit GKO_ACC_ATTRIBUTES reduced_storage( - storage_type *const GKO_ACC_RESTRICT ptr) + storage_type* const GKO_ACC_RESTRICT ptr) : ptr_{ptr} {} constexpr GKO_ACC_ATTRIBUTES operator arithmetic_type() const { - const storage_type *const GKO_ACC_RESTRICT r_ptr = ptr_; + const storage_type* const GKO_ACC_RESTRICT r_ptr = ptr_; return static_cast(*r_ptr); } private: - storage_type *const GKO_ACC_RESTRICT ptr_; + storage_type* const GKO_ACC_RESTRICT ptr_; }; template constexpr remove_complex_t abs( - const reduced_storage &ref) + const reduced_storage& ref) { using std::abs; return abs(static_cast(ref)); @@ -364,50 +366,52 @@ class scaled_reduced_storage using storage_type = StorageType; // Allow move construction, so perfect forwarding is possible - scaled_reduced_storage(scaled_reduced_storage &&) = default; + scaled_reduced_storage(scaled_reduced_storage&&) = default; scaled_reduced_storage() = delete; ~scaled_reduced_storage() = default; // Forbid copy construction - scaled_reduced_storage(const scaled_reduced_storage &) = delete; + scaled_reduced_storage(const scaled_reduced_storage&) = delete; constexpr explicit GKO_ACC_ATTRIBUTES scaled_reduced_storage( - storage_type *const GKO_ACC_RESTRICT ptr, arithmetic_type scalar) + storage_type* const GKO_ACC_RESTRICT ptr, arithmetic_type scalar) : ptr_{ptr}, scalar_{scalar} {} constexpr GKO_ACC_ATTRIBUTES operator arithmetic_type() const { - const storage_type *const GKO_ACC_RESTRICT r_ptr = ptr_; + const storage_type* const GKO_ACC_RESTRICT r_ptr = ptr_; return static_cast(*r_ptr) * scalar_; } constexpr GKO_ACC_ATTRIBUTES arithmetic_type - operator=(arithmetic_type val) &&noexcept + operator=(arithmetic_type val) && + noexcept { - storage_type *const GKO_ACC_RESTRICT r_ptr = ptr_; + storage_type* const GKO_ACC_RESTRICT r_ptr = ptr_; *r_ptr = static_cast(val / scalar_); return val; } constexpr GKO_ACC_ATTRIBUTES arithmetic_type - operator=(const scaled_reduced_storage &ref) && + operator=(const scaled_reduced_storage& ref) && { std::move(*this) = static_cast(ref); return static_cast(*this); } constexpr GKO_ACC_ATTRIBUTES arithmetic_type - operator=(scaled_reduced_storage &&ref) &&noexcept + operator=(scaled_reduced_storage&& ref) && + noexcept { std::move(*this) = static_cast(ref); return static_cast(*this); } private: - storage_type *const GKO_ACC_RESTRICT ptr_; + storage_type* const GKO_ACC_RESTRICT ptr_; const arithmetic_type scalar_; }; @@ -422,37 +426,37 @@ class scaled_reduced_storage using storage_type = const StorageType; // Allow move construction, so perfect forwarding is possible - scaled_reduced_storage(scaled_reduced_storage &&) = default; + scaled_reduced_storage(scaled_reduced_storage&&) = default; scaled_reduced_storage() = delete; ~scaled_reduced_storage() = default; // Forbid copy construction and move assignment - scaled_reduced_storage(const scaled_reduced_storage &) = delete; + scaled_reduced_storage(const scaled_reduced_storage&) = delete; - scaled_reduced_storage &operator=(scaled_reduced_storage &&) = delete; + scaled_reduced_storage& operator=(scaled_reduced_storage&&) = delete; constexpr explicit GKO_ACC_ATTRIBUTES scaled_reduced_storage( - storage_type *const GKO_ACC_RESTRICT ptr, arithmetic_type scalar) + storage_type* const GKO_ACC_RESTRICT ptr, arithmetic_type scalar) : ptr_{ptr}, scalar_{scalar} {} constexpr GKO_ACC_ATTRIBUTES operator arithmetic_type() const { - const storage_type *const GKO_ACC_RESTRICT r_ptr = ptr_; + const storage_type* const GKO_ACC_RESTRICT r_ptr = ptr_; return static_cast(*r_ptr) * scalar_; } private: - storage_type *const GKO_ACC_RESTRICT ptr_; + storage_type* const GKO_ACC_RESTRICT ptr_; const arithmetic_type scalar_; }; template constexpr remove_complex_t abs( - const scaled_reduced_storage &ref) + const scaled_reduced_storage& ref) { using std::abs; return abs(static_cast(ref)); diff --git a/accessor/block_col_major.hpp b/accessor/block_col_major.hpp index 6495180ba2f..9ad8c107939 100644 --- a/accessor/block_col_major.hpp +++ b/accessor/block_col_major.hpp @@ -82,7 +82,7 @@ class block_col_major { /** * Type of underlying data storage. */ - using data_type = value_type *; + using data_type = value_type*; using const_accessor = block_col_major; using stride_type = std::array; @@ -142,8 +142,8 @@ class block_col_major { */ template constexpr GKO_ACC_ATTRIBUTES - std::enable_if_t::value, value_type &> - operator()(Indices &&... indices) const + std::enable_if_t::value, value_type&> + operator()(Indices&&... indices) const { return data[helper::blk_col_major::compute_index( lengths, stride, std::forward(indices)...)]; diff --git a/accessor/index_span.hpp b/accessor/index_span.hpp index 6feb6007dad..c294fb0a701 100644 --- a/accessor/index_span.hpp +++ b/accessor/index_span.hpp @@ -89,38 +89,38 @@ struct index_span { */ GKO_ACC_ATTRIBUTES constexpr bool is_valid() const { return begin < end; } - friend GKO_ACC_ATTRIBUTES constexpr bool operator<(const index_span &first, - const index_span &second) + friend GKO_ACC_ATTRIBUTES constexpr bool operator<(const index_span& first, + const index_span& second) { return first.end < second.begin; } friend GKO_ACC_ATTRIBUTES constexpr bool operator<=( - const index_span &first, const index_span &second) + const index_span& first, const index_span& second) { return first.end <= second.begin; } - friend GKO_ACC_ATTRIBUTES constexpr bool operator>(const index_span &first, - const index_span &second) + friend GKO_ACC_ATTRIBUTES constexpr bool operator>(const index_span& first, + const index_span& second) { return second < first; } friend GKO_ACC_ATTRIBUTES constexpr bool operator>=( - const index_span &first, const index_span &second) + const index_span& first, const index_span& second) { return second <= first; } friend GKO_ACC_ATTRIBUTES constexpr bool operator==( - const index_span &first, const index_span &second) + const index_span& first, const index_span& second) { return first.begin == second.begin && first.end == second.end; } friend GKO_ACC_ATTRIBUTES constexpr bool operator!=( - const index_span &first, const index_span &second) + const index_span& first, const index_span& second) { return !(first == second); } diff --git a/accessor/range.hpp b/accessor/range.hpp index ee3eb59c763..628d0962257 100644 --- a/accessor/range.hpp +++ b/accessor/range.hpp @@ -97,7 +97,7 @@ class range { std::enable_if_t< !check_if_same...>::value, int> = 0> - GKO_ACC_ATTRIBUTES constexpr explicit range(AccessorParams &&... args) + GKO_ACC_ATTRIBUTES constexpr explicit range(AccessorParams&&... args) : accessor_{std::forward(args)...} {} @@ -117,9 +117,8 @@ class range { * given index_spans. */ template - GKO_ACC_ATTRIBUTES constexpr auto operator()( - DimensionTypes &&... dimensions) const - -> decltype(std::declval()( + GKO_ACC_ATTRIBUTES constexpr auto operator()(DimensionTypes&&... dimensions) + const -> decltype(std::declval()( std::forward(dimensions)...)) { static_assert(sizeof...(dimensions) <= dimensionality, @@ -146,7 +145,7 @@ class range { * * @return pointer to the accessor */ - GKO_ACC_ATTRIBUTES constexpr const accessor *operator->() const noexcept + GKO_ACC_ATTRIBUTES constexpr const accessor* operator->() const noexcept { return &accessor_; } @@ -156,7 +155,7 @@ class range { * * @return reference to the accessor */ - GKO_ACC_ATTRIBUTES constexpr const accessor &get_accessor() const noexcept + GKO_ACC_ATTRIBUTES constexpr const accessor& get_accessor() const noexcept { return accessor_; } diff --git a/accessor/reduced_row_major.hpp b/accessor/reduced_row_major.hpp index 2d271fbd340..5cbb05fca06 100644 --- a/accessor/reduced_row_major.hpp +++ b/accessor/reduced_row_major.hpp @@ -105,7 +105,7 @@ class reduced_row_major { * @param stride stride array used for memory accesses */ constexpr GKO_ACC_ATTRIBUTES reduced_row_major(dim_type size, - storage_type *storage, + storage_type* storage, storage_stride_type stride) : size_(size), storage_{storage}, stride_(stride) {} @@ -121,8 +121,8 @@ class reduced_row_major { */ template constexpr GKO_ACC_ATTRIBUTES reduced_row_major(dim_type size, - storage_type *storage, - Strides &&... strides) + storage_type* storage, + Strides&&... strides) : reduced_row_major{ size, storage, storage_stride_type{{std::forward(strides)...}}} @@ -139,7 +139,7 @@ class reduced_row_major { * @param size multidimensional size of the memory */ constexpr GKO_ACC_ATTRIBUTES reduced_row_major(dim_type size, - storage_type *storage) + storage_type* storage) : reduced_row_major{ size, storage, helper::compute_default_row_major_stride_array(size)} @@ -189,7 +189,7 @@ class reduced_row_major { constexpr GKO_ACC_ATTRIBUTES std::enable_if_t< are_all_integral::value, std::conditional_t> - operator()(Indices &&... indices) const + operator()(Indices&&... indices) const { return reference_type{storage_ + compute_index(std::forward(indices)...)}; @@ -229,14 +229,14 @@ class reduced_row_major { * @returns returns a pointer to a stride array of size dimensionality - 1 */ GKO_ACC_ATTRIBUTES - constexpr const storage_stride_type &get_stride() const { return stride_; } + constexpr const storage_stride_type& get_stride() const { return stride_; } /** * Returns the pointer to the storage data * * @returns the pointer to the storage data */ - constexpr GKO_ACC_ATTRIBUTES storage_type *get_stored_data() const + constexpr GKO_ACC_ATTRIBUTES storage_type* get_stored_data() const { return storage_; } @@ -246,7 +246,7 @@ class reduced_row_major { * * @returns a const pointer to the storage data */ - constexpr GKO_ACC_ATTRIBUTES const storage_type *get_const_storage() const + constexpr GKO_ACC_ATTRIBUTES const storage_type* get_const_storage() const { return storage_; } @@ -254,7 +254,7 @@ class reduced_row_major { protected: template constexpr GKO_ACC_ATTRIBUTES size_type - compute_index(Indices &&... indices) const + compute_index(Indices&&... indices) const { static_assert(sizeof...(Indices) == dimensionality, "Number of indices must match dimensionality!"); @@ -264,7 +264,7 @@ class reduced_row_major { private: const dim_type size_; - storage_type *const storage_; + storage_type* const storage_; const storage_stride_type stride_; }; diff --git a/accessor/row_major.hpp b/accessor/row_major.hpp index 3aed9fe685f..2377e3d2bf6 100644 --- a/accessor/row_major.hpp +++ b/accessor/row_major.hpp @@ -80,7 +80,7 @@ class row_major { /** * Type of underlying data storage. */ - using data_type = value_type *; + using data_type = value_type*; using const_accessor = row_major; using length_type = std::array; @@ -140,8 +140,8 @@ class row_major { */ template constexpr GKO_ACC_ATTRIBUTES - std::enable_if_t::value, value_type &> - operator()(Indices &&... indices) const + std::enable_if_t::value, value_type&> + operator()(Indices&&... indices) const { return data[helper::compute_row_major_index( lengths, stride, std::forward(indices)...)]; diff --git a/accessor/scaled_reduced_row_major.hpp b/accessor/scaled_reduced_row_major.hpp index 356a116fb0a..afbc97e4a01 100644 --- a/accessor/scaled_reduced_row_major.hpp +++ b/accessor/scaled_reduced_row_major.hpp @@ -85,11 +85,11 @@ struct enable_write_scalar { */ template constexpr GKO_ACC_ATTRIBUTES scalar_type - write_scalar_masked(scalar_type value, Indices &&... indices) const + write_scalar_masked(scalar_type value, Indices&&... indices) const { static_assert(sizeof...(Indices) == Dimensionality, "Number of indices must match dimensionality!"); - scalar_type *GKO_ACC_RESTRICT rest_scalar = self()->scalar_; + scalar_type* GKO_ACC_RESTRICT rest_scalar = self()->scalar_; return rest_scalar[self()->compute_mask_scalar_index( std::forward(indices)...)] = value; } @@ -107,18 +107,18 @@ struct enable_write_scalar { */ template constexpr GKO_ACC_ATTRIBUTES scalar_type - write_scalar_direct(scalar_type value, Indices &&... indices) const + write_scalar_direct(scalar_type value, Indices&&... indices) const { - scalar_type *GKO_ACC_RESTRICT rest_scalar = self()->scalar_; + scalar_type* GKO_ACC_RESTRICT rest_scalar = self()->scalar_; return rest_scalar[self()->compute_direct_scalar_index( std::forward(indices)...)] = value; } private: - constexpr GKO_ACC_ATTRIBUTES const Accessor *self() const + constexpr GKO_ACC_ATTRIBUTES const Accessor* self() const { - return static_cast(this); + return static_cast(this); } }; @@ -215,8 +215,8 @@ class scaled_reduced_row_major * @param scalar_stride stride array used for memory accesses to scalar */ constexpr GKO_ACC_ATTRIBUTES scaled_reduced_row_major( - dim_type size, storage_type *storage, - storage_stride_type storage_stride, scalar_type *scalar, + dim_type size, storage_type* storage, + storage_stride_type storage_stride, scalar_type* scalar, scalar_stride_type scalar_stride) : size_(size), storage_{storage}, @@ -237,8 +237,8 @@ class scaled_reduced_row_major * values. */ constexpr GKO_ACC_ATTRIBUTES scaled_reduced_row_major( - dim_type size, storage_type *storage, storage_stride_type stride, - scalar_type *scalar) + dim_type size, storage_type* storage, storage_stride_type stride, + scalar_type* scalar) : scaled_reduced_row_major{ size, storage, stride, scalar, helper::compute_default_masked_row_major_stride_array< @@ -256,8 +256,8 @@ class scaled_reduced_row_major * values. */ constexpr GKO_ACC_ATTRIBUTES scaled_reduced_row_major(dim_type size, - storage_type *storage, - scalar_type *scalar) + storage_type* storage, + scalar_type* scalar) : scaled_reduced_row_major{ size, storage, helper::compute_default_row_major_stride_array< @@ -295,9 +295,9 @@ class scaled_reduced_row_major */ template constexpr GKO_ACC_ATTRIBUTES scalar_type - read_scalar_masked(Indices &&... indices) const + read_scalar_masked(Indices&&... indices) const { - const arithmetic_type *GKO_ACC_RESTRICT rest_scalar = scalar_; + const arithmetic_type* GKO_ACC_RESTRICT rest_scalar = scalar_; return rest_scalar[compute_mask_scalar_index( std::forward(indices)...)]; } @@ -313,9 +313,9 @@ class scaled_reduced_row_major */ template constexpr GKO_ACC_ATTRIBUTES scalar_type - read_scalar_direct(Indices &&... indices) const + read_scalar_direct(Indices&&... indices) const { - const arithmetic_type *GKO_ACC_RESTRICT rest_scalar = scalar_; + const arithmetic_type* GKO_ACC_RESTRICT rest_scalar = scalar_; return rest_scalar[compute_direct_scalar_index( std::forward(indices)...)]; } @@ -389,7 +389,7 @@ class scaled_reduced_row_major * @returns a const reference to the storage stride array of size * dimensionality - 1 */ - constexpr GKO_ACC_ATTRIBUTES const storage_stride_type &get_storage_stride() + constexpr GKO_ACC_ATTRIBUTES const storage_stride_type& get_storage_stride() const { return storage_stride_; @@ -400,7 +400,7 @@ class scaled_reduced_row_major * * @returns a const reference to the scalar stride array */ - constexpr GKO_ACC_ATTRIBUTES const scalar_stride_type &get_scalar_stride() + constexpr GKO_ACC_ATTRIBUTES const scalar_stride_type& get_scalar_stride() const { return scalar_stride_; @@ -411,7 +411,7 @@ class scaled_reduced_row_major * * @returns the pointer to the storage data */ - constexpr GKO_ACC_ATTRIBUTES storage_type *get_stored_data() const + constexpr GKO_ACC_ATTRIBUTES storage_type* get_stored_data() const { return storage_; } @@ -421,7 +421,7 @@ class scaled_reduced_row_major * * @returns a const pointer to the storage data */ - constexpr GKO_ACC_ATTRIBUTES const storage_type *get_const_storage() const + constexpr GKO_ACC_ATTRIBUTES const storage_type* get_const_storage() const { return storage_; } @@ -431,7 +431,7 @@ class scaled_reduced_row_major * * @returns the pointer to the scalar data */ - constexpr GKO_ACC_ATTRIBUTES scalar_type *get_scalar() const + constexpr GKO_ACC_ATTRIBUTES scalar_type* get_scalar() const { return scalar_; } @@ -441,7 +441,7 @@ class scaled_reduced_row_major * * @returns a const pointer to the scalar data */ - constexpr GKO_ACC_ATTRIBUTES const scalar_type *get_const_scalar() const + constexpr GKO_ACC_ATTRIBUTES const scalar_type* get_const_scalar() const { return scalar_; } @@ -449,7 +449,7 @@ class scaled_reduced_row_major protected: template constexpr GKO_ACC_ATTRIBUTES size_type - compute_index(Indices &&... indices) const + compute_index(Indices&&... indices) const { static_assert(sizeof...(Indices) == dimensionality, "Number of indices must match dimensionality!"); @@ -459,7 +459,7 @@ class scaled_reduced_row_major template constexpr GKO_ACC_ATTRIBUTES size_type - compute_mask_scalar_index(Indices &&... indices) const + compute_mask_scalar_index(Indices&&... indices) const { static_assert(sizeof...(Indices) == dimensionality, "Number of indices must match dimensionality!"); @@ -470,7 +470,7 @@ class scaled_reduced_row_major template constexpr GKO_ACC_ATTRIBUTES size_type - compute_direct_scalar_index(Indices &&... indices) const + compute_direct_scalar_index(Indices&&... indices) const { static_assert( sizeof...(Indices) == scalar_dim, @@ -483,9 +483,9 @@ class scaled_reduced_row_major private: const dim_type size_; - storage_type *const storage_; + storage_type* const storage_; const storage_stride_type storage_stride_; - scalar_type *const scalar_; + scalar_type* const scalar_; const scalar_stride_type scalar_stride_; }; diff --git a/benchmark/blas/blas.cpp b/benchmark/blas/blas.cpp index bf52992a4fa..ead98e5cf2d 100644 --- a/benchmark/blas/blas.cpp +++ b/benchmark/blas/blas.cpp @@ -342,7 +342,7 @@ struct dimensions { }; -gko::size_type get_optional(rapidjson::Value &obj, const char *name, +gko::size_type get_optional(rapidjson::Value& obj, const char* name, gko::size_type default_value) { if (obj.HasMember(name)) { @@ -353,7 +353,7 @@ gko::size_type get_optional(rapidjson::Value &obj, const char *name, } -dimensions parse_dims(rapidjson::Value &test_case) +dimensions parse_dims(rapidjson::Value& test_case) { dimensions result; result.n = test_case["n"].GetInt64(); @@ -426,12 +426,12 @@ std::map( }}}; -void apply_blas(const char *operation_name, std::shared_ptr exec, - rapidjson::Value &test_case, - rapidjson::MemoryPoolAllocator<> &allocator) +void apply_blas(const char* operation_name, std::shared_ptr exec, + rapidjson::Value& test_case, + rapidjson::MemoryPoolAllocator<>& allocator) { try { - auto &blas_case = test_case["blas"]; + auto& blas_case = test_case["blas"]; add_or_set_member(blas_case, operation_name, rapidjson::Value(rapidjson::kObjectType), allocator); @@ -469,7 +469,7 @@ void apply_blas(const char *operation_name, std::shared_ptr exec, // compute and write benchmark data add_or_set_member(blas_case[operation_name], "completed", true, allocator); - } catch (const std::exception &e) { + } catch (const std::exception& e) { add_or_set_member(test_case["blas"][operation_name], "completed", false, allocator); if (FLAGS_keep_errors) { @@ -484,7 +484,7 @@ void apply_blas(const char *operation_name, std::shared_ptr exec, } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { std::string header = "A benchmark for measuring performance of Ginkgo's BLAS-like " @@ -521,9 +521,9 @@ int main(int argc, char *argv[]) std::exit(1); } - auto &allocator = test_cases.GetAllocator(); + auto& allocator = test_cases.GetAllocator(); - for (auto &test_case : test_cases.GetArray()) { + for (auto& test_case : test_cases.GetArray()) { try { // set up benchmark if (!test_case.HasMember("blas")) { @@ -531,23 +531,23 @@ int main(int argc, char *argv[]) rapidjson::Value(rapidjson::kObjectType), allocator); } - auto &blas_case = test_case["blas"]; + auto& blas_case = test_case["blas"]; if (!FLAGS_overwrite && all_of(begin(operations), end(operations), - [&blas_case](const std::string &s) { + [&blas_case](const std::string& s) { return blas_case.HasMember(s.c_str()); })) { continue; } std::clog << "Running test case: " << test_case << std::endl; - for (const auto &operation_name : operations) { + for (const auto& operation_name : operations) { apply_blas(operation_name.c_str(), exec, test_case, allocator); std::clog << "Current state:" << std::endl << test_cases << std::endl; backup_results(test_cases); } - } catch (const std::exception &e) { + } catch (const std::exception& e) { std::cerr << "Error setting up benchmark, what(): " << e.what() << std::endl; } diff --git a/benchmark/conversions/conversions.cpp b/benchmark/conversions/conversions.cpp index 111470a6f0f..197d24ca267 100644 --- a/benchmark/conversions/conversions.cpp +++ b/benchmark/conversions/conversions.cpp @@ -58,14 +58,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // This function supposes that management of `FLAGS_overwrite` is done before // calling it -void convert_matrix(const gko::LinOp *matrix_from, const char *format_to, - const char *conversion_name, +void convert_matrix(const gko::LinOp* matrix_from, const char* format_to, + const char* conversion_name, std::shared_ptr exec, - rapidjson::Value &test_case, - rapidjson::MemoryPoolAllocator<> &allocator) + rapidjson::Value& test_case, + rapidjson::MemoryPoolAllocator<>& allocator) { try { - auto &conversion_case = test_case["conversions"]; + auto& conversion_case = test_case["conversions"]; add_or_set_member(conversion_case, conversion_name, rapidjson::Value(rapidjson::kObjectType), allocator); @@ -95,7 +95,7 @@ void convert_matrix(const gko::LinOp *matrix_from, const char *format_to, // compute and write benchmark data add_or_set_member(conversion_case[conversion_name], "completed", true, allocator); - } catch (const std::exception &e) { + } catch (const std::exception& e) { add_or_set_member(test_case["conversions"][conversion_name], "completed", false, allocator); if (FLAGS_keep_errors) { @@ -110,7 +110,7 @@ void convert_matrix(const gko::LinOp *matrix_from, const char *format_to, } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { std::string header = "A benchmark for measuring performance of Ginkgo's conversions.\n"; @@ -133,9 +133,9 @@ int main(int argc, char *argv[]) print_config_error_and_exit(); } - auto &allocator = test_cases.GetAllocator(); + auto& allocator = test_cases.GetAllocator(); - for (auto &test_case : test_cases.GetArray()) { + for (auto& test_case : test_cases.GetArray()) { std::clog << "Benchmarking conversions. " << std::endl; // set up benchmark validate_option_object(test_case); @@ -144,25 +144,25 @@ int main(int argc, char *argv[]) rapidjson::Value(rapidjson::kObjectType), allocator); } - auto &conversion_case = test_case["conversions"]; + auto& conversion_case = test_case["conversions"]; std::clog << "Running test case: " << test_case << std::endl; std::ifstream mtx_fd(test_case["filename"].GetString()); gko::matrix_data data; try { data = gko::read_raw(mtx_fd); - } catch (std::exception &e) { + } catch (std::exception& e) { std::cerr << "Error setting up matrix data, what(): " << e.what() << std::endl; continue; } std::clog << "Matrix is of size (" << data.size[0] << ", " << data.size[1] << ")" << std::endl; - for (const auto &format_from : formats) { + for (const auto& format_from : formats) { try { auto matrix_from = share(formats::matrix_factory.at(format_from)(exec, data)); - for (const auto &format_to : formats) { + for (const auto& format_to : formats) { if (format_from == format_to) { continue; } @@ -181,8 +181,8 @@ int main(int argc, char *argv[]) << test_cases << std::endl; } backup_results(test_cases); - } catch (const gko::AllocationError &e) { - for (const auto &format : formats::matrix_factory) { + } catch (const gko::AllocationError& e) { + for (const auto& format : formats::matrix_factory) { const auto format_to = std::get<0>(format); auto conversion_name = std::string(format_from) + "-" + format_to; @@ -194,7 +194,7 @@ int main(int argc, char *argv[]) << format_from << ". what(): " << e.what() << std::endl; backup_results(test_cases); - } catch (const std::exception &e) { + } catch (const std::exception& e) { std::cerr << "Error when running benchmark, what(): " << e.what() << std::endl; } diff --git a/benchmark/matrix_generator/matrix_generator.cpp b/benchmark/matrix_generator/matrix_generator.cpp index 241678118f4..bbdad5b6e84 100644 --- a/benchmark/matrix_generator/matrix_generator.cpp +++ b/benchmark/matrix_generator/matrix_generator.cpp @@ -85,7 +85,7 @@ std::string input_format = // clang-format on -void validate_option_object(const rapidjson::Value &value) +void validate_option_object(const rapidjson::Value& value) { if (!value.IsObject() || !value.HasMember("filename") || !value["filename"].IsString() || !value.HasMember("problem") || @@ -97,12 +97,12 @@ void validate_option_object(const rapidjson::Value &value) using generator_function = std::function( - rapidjson::Value &, std::ranlux24 &)>; + rapidjson::Value&, std::ranlux24&)>; // matrix generators -gko::matrix_data generate_block_diagonal(rapidjson::Value &config, - std::ranlux24 &engine) +gko::matrix_data generate_block_diagonal(rapidjson::Value& config, + std::ranlux24& engine) { if (!config.HasMember("num_blocks") || !config["num_blocks"].IsUint() || !config.HasMember("block_size") || !config["block_size"].IsUint()) { @@ -122,7 +122,7 @@ std::map generator{ {"block-diagonal", generate_block_diagonal}}; -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { std::string header = "A utility that generates various types of " @@ -140,7 +140,7 @@ int main(int argc, char *argv[]) print_config_error_and_exit(1); } - for (auto &config : configurations.GetArray()) { + for (auto& config : configurations.GetArray()) { try { validate_option_object(config); std::clog << "Generating matrix: " << config << std::endl; @@ -149,7 +149,7 @@ int main(int argc, char *argv[]) auto mdata = generator[type](config["problem"], engine); std::ofstream ofs(filename); gko::write_raw(ofs, mdata, gko::layout_type::coordinate); - } catch (const std::exception &e) { + } catch (const std::exception& e) { std::cerr << "Error generating matrix, what(): " << e.what() << std::endl; } diff --git a/benchmark/matrix_statistics/matrix_statistics.cpp b/benchmark/matrix_statistics/matrix_statistics.cpp index d8a3ce8fc1b..51118fd7b27 100644 --- a/benchmark/matrix_statistics/matrix_statistics.cpp +++ b/benchmark/matrix_statistics/matrix_statistics.cpp @@ -52,9 +52,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // See en.wikipedia.org/wiki/Five-number_summary // Quartile computation uses Method 3 from en.wikipedia.org/wiki/Quartile -void compute_summary(const std::vector &dist, - rapidjson::Value &out, - rapidjson::MemoryPoolAllocator<> &allocator) +void compute_summary(const std::vector& dist, + rapidjson::Value& out, + rapidjson::MemoryPoolAllocator<>& allocator) { const auto q = dist.size() / 4; const auto r = dist.size() % 4; @@ -93,14 +93,14 @@ void compute_summary(const std::vector &dist, } -double compute_moment(int degree, const std::vector &dist, +double compute_moment(int degree, const std::vector& dist, double center = 0.0, double normalization = 1.0) { if (normalization == 0.0) { return 0.0; } double moment = 0.0; - for (const auto &x : dist) { + for (const auto& x : dist) { moment += std::pow(static_cast(x) - center, degree); } return moment / static_cast(dist.size()) / @@ -109,9 +109,9 @@ double compute_moment(int degree, const std::vector &dist, // See en.wikipedia.org/wiki/Moment_(mathematics) -void compute_moments(const std::vector &dist, - rapidjson::Value &out, - rapidjson::MemoryPoolAllocator<> &allocator) +void compute_moments(const std::vector& dist, + rapidjson::Value& out, + rapidjson::MemoryPoolAllocator<>& allocator) { const auto mean = compute_moment(1, dist); add_or_set_member(out, "mean", mean, allocator); @@ -130,9 +130,9 @@ void compute_moments(const std::vector &dist, template -void compute_distribution_properties(const std::vector &dist, - rapidjson::Value &out, - Allocator &allocator) +void compute_distribution_properties(const std::vector& dist, + rapidjson::Value& out, + Allocator& allocator) { compute_summary(dist, out, allocator); compute_moments(dist, out, allocator); @@ -140,12 +140,12 @@ void compute_distribution_properties(const std::vector &dist, template -void extract_matrix_statistics(gko::matrix_data &data, - rapidjson::Value &problem, Allocator &allocator) +void extract_matrix_statistics(gko::matrix_data& data, + rapidjson::Value& problem, Allocator& allocator) { std::vector row_dist(data.size[0]); std::vector col_dist(data.size[1]); - for (const auto &v : data.nonzeros) { + for (const auto& v : data.nonzeros) { ++row_dist[v.row]; ++col_dist[v.column]; } @@ -168,7 +168,7 @@ void extract_matrix_statistics(gko::matrix_data &data, } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { std::string header = "A utility that collects additional statistical properties of the " @@ -187,9 +187,9 @@ int main(int argc, char *argv[]) print_config_error_and_exit(); } - auto &allocator = test_cases.GetAllocator(); + auto& allocator = test_cases.GetAllocator(); - for (auto &test_case : test_cases.GetArray()) { + for (auto& test_case : test_cases.GetArray()) { try { // set up benchmark validate_option_object(test_case); @@ -198,7 +198,7 @@ int main(int argc, char *argv[]) rapidjson::Value(rapidjson::kObjectType), allocator); } - auto &problem = test_case["problem"]; + auto& problem = test_case["problem"]; std::clog << "Running test case: " << test_case << std::endl; @@ -212,7 +212,7 @@ int main(int argc, char *argv[]) extract_matrix_statistics(matrix, test_case["problem"], allocator); backup_results(test_cases); - } catch (const std::exception &e) { + } catch (const std::exception& e) { std::cerr << "Error extracting statistics, what(): " << e.what() << std::endl; } diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp index 2225af0ee82..7832a560f65 100644 --- a/benchmark/preconditioner/preconditioner.cpp +++ b/benchmark/preconditioner/preconditioner.cpp @@ -57,7 +57,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // preconditioner generation and application -std::string encode_parameters(const char *precond_name) +std::string encode_parameters(const char* precond_name) { static std::map encoder{ {"jacobi", @@ -129,15 +129,15 @@ std::string encode_parameters(const char *precond_name) } -void run_preconditioner(const char *precond_name, +void run_preconditioner(const char* precond_name, std::shared_ptr exec, std::shared_ptr system_matrix, - const vec *b, const vec *x, - rapidjson::Value &test_case, - rapidjson::MemoryPoolAllocator<> &allocator) + const vec* b, const vec* x, + rapidjson::Value& test_case, + rapidjson::MemoryPoolAllocator<>& allocator) { try { - auto &precond_object = test_case["preconditioner"]; + auto& precond_object = test_case["preconditioner"]; auto encoded_name = encode_parameters(precond_name); if (!FLAGS_overwrite && @@ -147,7 +147,7 @@ void run_preconditioner(const char *precond_name, add_or_set_member(precond_object, encoded_name.c_str(), rapidjson::Value(rapidjson::kObjectType), allocator); - auto &this_precond_data = precond_object[encoded_name.c_str()]; + auto& this_precond_data = precond_object[encoded_name.c_str()]; add_or_set_member(this_precond_data, "generate", rapidjson::Value(rapidjson::kObjectType), allocator); @@ -223,7 +223,7 @@ void run_preconditioner(const char *precond_name, } add_or_set_member(this_precond_data, "completed", true, allocator); - } catch (const std::exception &e) { + } catch (const std::exception& e) { auto encoded_name = encode_parameters(precond_name); add_or_set_member(test_case["preconditioner"], encoded_name.c_str(), rapidjson::Value(rapidjson::kObjectType), allocator); @@ -241,7 +241,7 @@ void run_preconditioner(const char *precond_name, } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Use csr as the default format FLAGS_formats = "csr"; @@ -257,7 +257,7 @@ int main(int argc, char *argv[]) print_general_information(extra_information); auto exec = get_executor(); - auto &engine = get_engine(); + auto& engine = get_engine(); auto preconditioners = split(FLAGS_preconditioners, ','); @@ -274,9 +274,9 @@ int main(int argc, char *argv[]) print_config_error_and_exit(); } - auto &allocator = test_cases.GetAllocator(); + auto& allocator = test_cases.GetAllocator(); - for (auto &test_case : test_cases.GetArray()) { + for (auto& test_case : test_cases.GetArray()) { try { // set up benchmark validate_option_object(test_case); @@ -285,10 +285,10 @@ int main(int argc, char *argv[]) rapidjson::Value(rapidjson::kObjectType), allocator); } - auto &precond_object = test_case["preconditioner"]; + auto& precond_object = test_case["preconditioner"]; if (!FLAGS_overwrite && all_of(begin(preconditioners), end(preconditioners), - [&precond_object](const std::string &s) { + [&precond_object](const std::string& s) { return precond_object.HasMember(s.c_str()); })) { continue; @@ -307,14 +307,14 @@ int main(int argc, char *argv[]) std::clog << "Matrix is of size (" << system_matrix->get_size()[0] << ", " << system_matrix->get_size()[1] << ")" << std::endl; - for (const auto &precond_name : preconditioners) { + for (const auto& precond_name : preconditioners) { run_preconditioner(precond_name.c_str(), exec, system_matrix, lend(b), lend(x), test_case, allocator); std::clog << "Current state:" << std::endl << test_cases << std::endl; backup_results(test_cases); } - } catch (const std::exception &e) { + } catch (const std::exception& e) { std::cerr << "Error setting up preconditioner, what(): " << e.what() << std::endl; } diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp index 77d72c94bdf..3942041ade7 100644 --- a/benchmark/solver/solver.cpp +++ b/benchmark/solver/solver.cpp @@ -166,7 +166,7 @@ std::unique_ptr> generate_rhs( template std::unique_ptr> generate_initial_guess( std::shared_ptr exec, - std::shared_ptr system_matrix, const vec *rhs, + std::shared_ptr system_matrix, const vec* rhs, Engine engine) { gko::dim<2> vec_size{system_matrix->get_size()[1], FLAGS_nrhs}; @@ -183,7 +183,7 @@ std::unique_ptr> generate_initial_guess( } -void validate_option_object(const rapidjson::Value &value) +void validate_option_object(const rapidjson::Value& value) { if (!value.IsObject() || !value.HasMember("optimal") || !value["optimal"].HasMember("spmv") || @@ -221,7 +221,7 @@ std::shared_ptr create_criterion( template std::unique_ptr add_criteria_precond_finalize( - SolverIntermediate inter, const std::shared_ptr &exec, + SolverIntermediate inter, const std::shared_ptr& exec, std::shared_ptr precond) { return inter.with_criteria(create_criterion(exec)) @@ -232,7 +232,7 @@ std::unique_ptr add_criteria_precond_finalize( template std::unique_ptr add_criteria_precond_finalize( - const std::shared_ptr &exec, + const std::shared_ptr& exec, std::shared_ptr precond) { return add_criteria_precond_finalize(Solver::build(), exec, precond); @@ -240,9 +240,9 @@ std::unique_ptr add_criteria_precond_finalize( std::unique_ptr generate_solver( - const std::shared_ptr &exec, + const std::shared_ptr& exec, std::shared_ptr precond, - const std::string &description) + const std::string& description) { std::string cb_gmres_prefix("cb_gmres_"); if (description.find(cb_gmres_prefix) == 0) { @@ -314,12 +314,12 @@ std::unique_ptr generate_solver( } -void write_precond_info(const gko::LinOp *precond, - rapidjson::Value &precond_info, - rapidjson::MemoryPoolAllocator<> &allocator) +void write_precond_info(const gko::LinOp* precond, + rapidjson::Value& precond_info, + rapidjson::MemoryPoolAllocator<>& allocator) { if (const auto jacobi = - dynamic_cast *>(precond)) { + dynamic_cast*>(precond)) { // extract block sizes const auto bdata = jacobi->get_parameters().block_pointers.get_const_data(); @@ -360,24 +360,24 @@ void write_precond_info(const gko::LinOp *precond, } -void solve_system(const std::string &solver_name, - const std::string &precond_name, - const char *precond_solver_name, +void solve_system(const std::string& solver_name, + const std::string& precond_name, + const char* precond_solver_name, std::shared_ptr exec, std::shared_ptr system_matrix, - const vec *b, const vec *x, - rapidjson::Value &test_case, - rapidjson::MemoryPoolAllocator<> &allocator) + const vec* b, const vec* x, + rapidjson::Value& test_case, + rapidjson::MemoryPoolAllocator<>& allocator) { try { - auto &solver_case = test_case["solver"]; + auto& solver_case = test_case["solver"]; if (!FLAGS_overwrite && solver_case.HasMember(precond_solver_name)) { return; } add_or_set_member(solver_case, precond_solver_name, rapidjson::Value(rapidjson::kObjectType), allocator); - auto &solver_json = solver_case[precond_solver_name]; + auto& solver_json = solver_case[precond_solver_name]; add_or_set_member(solver_json, "recurrent_residuals", rapidjson::Value(rapidjson::kArrayType), allocator); add_or_set_member(solver_json, "true_residuals", @@ -435,7 +435,7 @@ void solve_system(const std::string &solver_name, allocator, 1); if (auto prec = - dynamic_cast(lend(solver))) { + dynamic_cast(lend(solver))) { add_or_set_member(solver_json, "preconditioner", rapidjson::Value(rapidjson::kObjectType), allocator); @@ -506,7 +506,7 @@ void solve_system(const std::string &solver_name, // compute and write benchmark data add_or_set_member(solver_json, "completed", true, allocator); - } catch (const std::exception &e) { + } catch (const std::exception& e) { add_or_set_member(test_case["solver"][precond_solver_name], "completed", false, allocator); if (FLAGS_keep_errors) { @@ -521,7 +521,7 @@ void solve_system(const std::string &solver_name, } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Set the default repetitions = 1. FLAGS_repetitions = "1"; @@ -554,8 +554,8 @@ int main(int argc, char *argv[]) auto solvers = split(FLAGS_solvers, ','); auto preconds = split(FLAGS_preconditioners, ','); std::vector precond_solvers; - for (const auto &s : solvers) { - for (const auto &p : preconds) { + for (const auto& s : solvers) { + for (const auto& p : preconds) { precond_solvers.push_back(s + (p == "none" ? "" : "-" + p)); } } @@ -577,9 +577,9 @@ int main(int argc, char *argv[]) } auto engine = get_engine(); - auto &allocator = test_cases.GetAllocator(); + auto& allocator = test_cases.GetAllocator(); - for (auto &test_case : test_cases.GetArray()) { + for (auto& test_case : test_cases.GetArray()) { try { // set up benchmark validate_option_object(test_case); @@ -588,10 +588,10 @@ int main(int argc, char *argv[]) rapidjson::Value(rapidjson::kObjectType), allocator); } - auto &solver_case = test_case["solver"]; + auto& solver_case = test_case["solver"]; if (!FLAGS_overwrite && all_of(begin(precond_solvers), end(precond_solvers), - [&solver_case](const std::string &s) { + [&solver_case](const std::string& s) { return solver_case.HasMember(s.c_str()); })) { continue; @@ -626,8 +626,8 @@ int main(int argc, char *argv[]) << ", " << system_matrix->get_size()[1] << ")" << std::endl; auto precond_solver_name = begin(precond_solvers); - for (const auto &solver_name : solvers) { - for (const auto &precond_name : preconds) { + for (const auto& solver_name : solvers) { + for (const auto& precond_name : preconds) { std::clog << "\tRunning solver: " << *precond_solver_name << std::endl; solve_system(solver_name, precond_name, @@ -638,7 +638,7 @@ int main(int argc, char *argv[]) ++precond_solver_name; } } - } catch (const std::exception &e) { + } catch (const std::exception& e) { std::cerr << "Error setting up solver, what(): " << e.what() << std::endl; } diff --git a/benchmark/spmv/spmv.cpp b/benchmark/spmv/spmv.cpp index ac7562e4ed6..c50e2d711da 100644 --- a/benchmark/spmv/spmv.cpp +++ b/benchmark/spmv/spmv.cpp @@ -62,14 +62,14 @@ DEFINE_uint32(nrhs, 1, "The number of right hand sides"); // This function supposes that management of `FLAGS_overwrite` is done before // calling it -void apply_spmv(const char *format_name, std::shared_ptr exec, - const gko::matrix_data &data, const vec *b, - const vec *x, const vec *answer, - rapidjson::Value &test_case, - rapidjson::MemoryPoolAllocator<> &allocator) +void apply_spmv(const char* format_name, std::shared_ptr exec, + const gko::matrix_data& data, const vec* b, + const vec* x, const vec* answer, + rapidjson::Value& test_case, + rapidjson::MemoryPoolAllocator<>& allocator) { try { - auto &spmv_case = test_case["spmv"]; + auto& spmv_case = test_case["spmv"]; add_or_set_member(spmv_case, format_name, rapidjson::Value(rapidjson::kObjectType), allocator); @@ -103,12 +103,12 @@ void apply_spmv(const char *format_name, std::shared_ptr exec, // tuning run #ifdef GINKGO_BENCHMARK_ENABLE_TUNING - auto &format_case = spmv_case[format_name]; + auto& format_case = spmv_case[format_name]; if (!format_case.HasMember("tuning")) { format_case.AddMember( "tuning", rapidjson::Value(rapidjson::kObjectType), allocator); } - auto &tuning_case = format_case["tuning"]; + auto& tuning_case = format_case["tuning"]; add_or_set_member(tuning_case, "time", rapidjson::Value(rapidjson::kArrayType), allocator); add_or_set_member(tuning_case, "values", @@ -151,7 +151,7 @@ void apply_spmv(const char *format_name, std::shared_ptr exec, // compute and write benchmark data add_or_set_member(spmv_case[format_name], "completed", true, allocator); - } catch (const std::exception &e) { + } catch (const std::exception& e) { add_or_set_member(test_case["spmv"][format_name], "completed", false, allocator); if (FLAGS_keep_errors) { @@ -166,7 +166,7 @@ void apply_spmv(const char *format_name, std::shared_ptr exec, } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { std::string header = "A benchmark for measuring performance of Ginkgo's spmv.\n"; @@ -191,9 +191,9 @@ int main(int argc, char *argv[]) print_config_error_and_exit(); } - auto &allocator = test_cases.GetAllocator(); + auto& allocator = test_cases.GetAllocator(); - for (auto &test_case : test_cases.GetArray()) { + for (auto& test_case : test_cases.GetArray()) { try { // set up benchmark validate_option_object(test_case); @@ -202,10 +202,10 @@ int main(int argc, char *argv[]) rapidjson::Value(rapidjson::kObjectType), allocator); } - auto &spmv_case = test_case["spmv"]; + auto& spmv_case = test_case["spmv"]; if (!FLAGS_overwrite && all_of(begin(formats), end(formats), - [&spmv_case](const std::string &s) { + [&spmv_case](const std::string& s) { return spmv_case.HasMember(s.c_str()); })) { continue; @@ -239,7 +239,7 @@ int main(int argc, char *argv[]) system_matrix->apply(lend(b), lend(answer)); exec->synchronize(); } - for (const auto &format_name : formats) { + for (const auto& format_name : formats) { apply_spmv(format_name.c_str(), exec, data, lend(b), lend(x), lend(answer), test_case, allocator); std::clog << "Current state:" << std::endl @@ -260,7 +260,7 @@ int main(int argc, char *argv[]) } backup_results(test_cases); } - } catch (const std::exception &e) { + } catch (const std::exception& e) { std::cerr << "Error setting up matrix data, what(): " << e.what() << std::endl; } diff --git a/benchmark/utils/cuda_linops.hpp b/benchmark/utils/cuda_linops.hpp index d64ec8b3ebd..39af3087ced 100644 --- a/benchmark/utils/cuda_linops.hpp +++ b/benchmark/utils/cuda_linops.hpp @@ -67,14 +67,14 @@ class CuspBase : public gko::LinOp { } protected: - void apply_impl(const gko::LinOp *, const gko::LinOp *, const gko::LinOp *, - gko::LinOp *) const override + void apply_impl(const gko::LinOp*, const gko::LinOp*, const gko::LinOp*, + gko::LinOp*) const override { GKO_NOT_IMPLEMENTED; } CuspBase(std::shared_ptr exec, - const gko::dim<2> &size = gko::dim<2>{}) + const gko::dim<2>& size = gko::dim<2>{}) : gko::LinOp(exec, size) { gpu_exec_ = std::dynamic_pointer_cast(exec); @@ -86,9 +86,9 @@ class CuspBase : public gko::LinOp { ~CuspBase() = default; - CuspBase(const CuspBase &other) = delete; + CuspBase(const CuspBase& other) = delete; - CuspBase &operator=(const CuspBase &other) + CuspBase& operator=(const CuspBase& other) { if (this != &other) { gko::LinOp::operator=(other); @@ -113,7 +113,7 @@ class CuspBase : public gko::LinOp { private: std::shared_ptr gpu_exec_; template - using handle_manager = std::unique_ptr>; + using handle_manager = std::unique_ptr>; handle_manager descr_; }; @@ -134,7 +134,7 @@ class CuspCsrmp using csr = gko::matrix::Csr; using mat_data = gko::matrix_data; - void read(const mat_data &data) override + void read(const mat_data& data) override { csr_->read(data); this->set_size(gko::dim<2>{csr_->get_size()}); @@ -146,7 +146,7 @@ class CuspCsrmp } protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override { auto dense_b = gko::as>(b); auto dense_x = gko::as>(x); @@ -165,7 +165,7 @@ class CuspCsrmp } CuspCsrmp(std::shared_ptr exec, - const gko::dim<2> &size = gko::dim<2>{}) + const gko::dim<2>& size = gko::dim<2>{}) : gko::EnableLinOp(exec, size), csr_(std::move( csr::create(exec, std::make_shared()))), @@ -194,7 +194,7 @@ class CuspCsr using csr = gko::matrix::Csr; using mat_data = gko::matrix_data; - void read(const mat_data &data) override + void read(const mat_data& data) override { csr_->read(data); this->set_size(gko::dim<2>{csr_->get_size()}); @@ -206,7 +206,7 @@ class CuspCsr } protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override { auto dense_b = gko::as>(b); auto dense_x = gko::as>(x); @@ -225,7 +225,7 @@ class CuspCsr } CuspCsr(std::shared_ptr exec, - const gko::dim<2> &size = gko::dim<2>{}) + const gko::dim<2>& size = gko::dim<2>{}) : gko::EnableLinOp(exec, size), csr_(std::move( csr::create(exec, std::make_shared()))), @@ -254,7 +254,7 @@ class CuspCsrmm using csr = gko::matrix::Csr; using mat_data = gko::matrix_data; - void read(const mat_data &data) override + void read(const mat_data& data) override { csr_->read(data); this->set_size(gko::dim<2>{csr_->get_size()}); @@ -266,7 +266,7 @@ class CuspCsrmm } protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override { auto dense_b = gko::as>(b); auto dense_x = gko::as>(x); @@ -286,7 +286,7 @@ class CuspCsrmm } CuspCsrmm(std::shared_ptr exec, - const gko::dim<2> &size = gko::dim<2>{}) + const gko::dim<2>& size = gko::dim<2>{}) : gko::EnableLinOp(exec, size), csr_(std::move( csr::create(exec, std::make_shared()))), @@ -318,7 +318,7 @@ class CuspCsrEx using csr = gko::matrix::Csr; using mat_data = gko::matrix_data; - void read(const mat_data &data) override + void read(const mat_data& data) override { csr_->read(data); this->set_size(gko::dim<2>{csr_->get_size()}); @@ -329,12 +329,12 @@ class CuspCsrEx return csr_->get_num_stored_elements(); } - CuspCsrEx(const CuspCsrEx &other) = delete; + CuspCsrEx(const CuspCsrEx& other) = delete; - CuspCsrEx &operator=(const CuspCsrEx &other) = default; + CuspCsrEx& operator=(const CuspCsrEx& other) = default; protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override { auto dense_b = gko::as>(b); auto dense_x = gko::as>(x); @@ -370,7 +370,7 @@ class CuspCsrEx CuspCsrEx(std::shared_ptr exec, - const gko::dim<2> &size = gko::dim<2>{}) + const gko::dim<2>& size = gko::dim<2>{}) : gko::EnableLinOp(exec, size), csr_(std::move( csr::create(exec, std::make_shared()))), @@ -410,7 +410,7 @@ class CuspHybrid using csr = gko::matrix::Csr; using mat_data = gko::matrix_data; - void read(const mat_data &data) override + void read(const mat_data& data) override { auto t_csr = csr::create(this->get_executor(), std::make_shared()); @@ -432,18 +432,18 @@ class CuspHybrid try { gko::cuda::device_guard g{id}; GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroyHybMat(hyb_)); - } catch (const std::exception &e) { + } catch (const std::exception& e) { std::cerr << "Error when unallocating CuspHybrid hyb_ matrix: " << e.what() << std::endl; } } - CuspHybrid(const CuspHybrid &other) = delete; + CuspHybrid(const CuspHybrid& other) = delete; - CuspHybrid &operator=(const CuspHybrid &other) = default; + CuspHybrid& operator=(const CuspHybrid& other) = default; protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override { auto dense_b = gko::as>(b); auto dense_x = gko::as>(x); @@ -459,7 +459,7 @@ class CuspHybrid } CuspHybrid(std::shared_ptr exec, - const gko::dim<2> &size = gko::dim<2>{}) + const gko::dim<2>& size = gko::dim<2>{}) : gko::EnableLinOp(exec, size), trans_(CUSPARSE_OPERATION_NON_TRANSPOSE) { @@ -488,8 +488,8 @@ class CuspHybrid template void cusp_generic_spmv(std::shared_ptr gpu_exec, const cusparseSpMatDescr_t mat, - const gko::Array &scalars, - const gko::LinOp *b, gko::LinOp *x, + const gko::Array& scalars, + const gko::LinOp* b, gko::LinOp* x, cusparseOperation_t trans, cusparseSpMVAlg_t alg) { cudaDataType_t cu_value = gko::kernels::cuda::cuda_data_type(); @@ -507,7 +507,7 @@ void cusp_generic_spmv(std::shared_ptr gpu_exec, // cusparseCreateDnVec only allows non-const pointer GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseCreateDnVec( &vecb, dense_b->get_num_stored_elements(), - as_culibs_type(const_cast(db)), cu_value)); + as_culibs_type(const_cast(db)), cu_value)); gko::size_type buffer_size = 0; GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSpMV_bufferSize( @@ -542,7 +542,7 @@ class CuspGenericCsr gko::kernels::cuda::cusparse_index_type(); cudaDataType_t cu_value = gko::kernels::cuda::cuda_data_type(); - void read(const mat_data &data) override + void read(const mat_data& data) override { using gko::kernels::cuda::as_culibs_type; csr_->read(data); @@ -567,25 +567,25 @@ class CuspGenericCsr try { gko::cuda::device_guard g{id}; GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroySpMat(mat_)); - } catch (const std::exception &e) { + } catch (const std::exception& e) { std::cerr << "Error when unallocating CuspGenericCsr mat_ matrix: " << e.what() << std::endl; } } - CuspGenericCsr(const CuspGenericCsr &other) = delete; + CuspGenericCsr(const CuspGenericCsr& other) = delete; - CuspGenericCsr &operator=(const CuspGenericCsr &other) = default; + CuspGenericCsr& operator=(const CuspGenericCsr& other) = default; protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override { cusp_generic_spmv(this->get_gpu_exec(), mat_, scalars, b, x, trans_, Alg); } CuspGenericCsr(std::shared_ptr exec, - const gko::dim<2> &size = gko::dim<2>{}) + const gko::dim<2>& size = gko::dim<2>{}) : gko::EnableLinOp(exec, size), csr_(std::move( csr::create(exec, std::make_shared()))), @@ -618,7 +618,7 @@ class CuspGenericCoo gko::kernels::cuda::cusparse_index_type(); cudaDataType_t cu_value = gko::kernels::cuda::cuda_data_type(); - void read(const mat_data &data) override + void read(const mat_data& data) override { using gko::kernels::cuda::as_culibs_type; coo_->read(data); @@ -643,25 +643,25 @@ class CuspGenericCoo try { gko::cuda::device_guard g{id}; GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroySpMat(mat_)); - } catch (const std::exception &e) { + } catch (const std::exception& e) { std::cerr << "Error when unallocating CuspGenericCoo mat_ matrix: " << e.what() << std::endl; } } - CuspGenericCoo(const CuspGenericCoo &other) = delete; + CuspGenericCoo(const CuspGenericCoo& other) = delete; - CuspGenericCoo &operator=(const CuspGenericCoo &other) = default; + CuspGenericCoo& operator=(const CuspGenericCoo& other) = default; protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override { cusp_generic_spmv(this->get_gpu_exec(), mat_, scalars, b, x, trans_, CUSPARSE_MV_ALG_DEFAULT); } CuspGenericCoo(std::shared_ptr exec, - const gko::dim<2> &size = gko::dim<2>{}) + const gko::dim<2>& size = gko::dim<2>{}) : gko::EnableLinOp(exec, size), coo_(std::move(coo::create(exec))), trans_(CUSPARSE_OPERATION_NON_TRANSPOSE) diff --git a/benchmark/utils/formats.hpp b/benchmark/utils/formats.hpp index 8ae4ad45e7a..0ab3f98b93b 100644 --- a/benchmark/utils/formats.hpp +++ b/benchmark/utils/formats.hpp @@ -179,7 +179,7 @@ using csr = gko::matrix::Csr; template std::unique_ptr read_matrix_from_data( std::shared_ptr exec, - const gko::matrix_data &data) + const gko::matrix_data& data) { auto mat = MatrixType::create(std::move(exec)); mat->read(data); @@ -197,9 +197,9 @@ template std::shared_ptr create_gpu_strategy( std::shared_ptr exec) { - if (auto cuda = dynamic_cast(exec.get())) { + if (auto cuda = dynamic_cast(exec.get())) { return std::make_shared(cuda->shared_from_this()); - } else if (auto hip = dynamic_cast(exec.get())) { + } else if (auto hip = dynamic_cast(exec.get())) { return std::make_shared(hip->shared_from_this()); } else { return std::make_shared(); @@ -213,7 +213,7 @@ std::shared_ptr create_gpu_strategy( * * @throws gko::Error if the imbalance limit is exceeded */ -void check_ell_admissibility(const gko::matrix_data &data) +void check_ell_admissibility(const gko::matrix_data& data) { if (data.size[0] == 0 || FLAGS_ell_imbalance_limit < 0) { return; @@ -239,7 +239,7 @@ void check_ell_admissibility(const gko::matrix_data &data) */ #define READ_MATRIX(MATRIX_TYPE, ...) \ [](std::shared_ptr exec, \ - const gko::matrix_data &data) \ + const gko::matrix_data& data) \ -> std::unique_ptr { \ auto mat = MATRIX_TYPE::create(std::move(exec), __VA_ARGS__); \ mat->read(data); \ diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp index d4f6a6853a8..cf7d2e99af0 100644 --- a/benchmark/utils/general.hpp +++ b/benchmark/utils/general.hpp @@ -127,8 +127,8 @@ DEFINE_double(repetition_growth_factor, 1.5, * @param header a header which describes the benchmark * @param format the format of the benchmark input data */ -void initialize_argument_parsing(int *argc, char **argv[], std::string &header, - std::string &format) +void initialize_argument_parsing(int* argc, char** argv[], std::string& header, + std::string& format) { std::ostringstream doc; doc << header << "Usage: " << (*argv)[0] << " [options]\n" @@ -157,7 +157,7 @@ void initialize_argument_parsing(int *argc, char **argv[], std::string &header, * * @param extra describes benchmark specific extra parameters to output */ -void print_general_information(std::string &extra) +void print_general_information(std::string& extra) { std::clog << gko::version_info::get() << std::endl << "Running on " << FLAGS_executor << "(" << FLAGS_device_id @@ -188,7 +188,7 @@ void print_general_information(std::string &extra) */ template std::unique_ptr read_matrix( - std::shared_ptr exec, const rapidjson::Value &options) + std::shared_ptr exec, const rapidjson::Value& options) { return gko::read(std::ifstream(options["filename"].GetString()), std::move(exec)); @@ -196,7 +196,7 @@ std::unique_ptr read_matrix( // Returns a random number engine -std::ranlux24 &get_engine() +std::ranlux24& get_engine() { static std::ranlux24 engine(FLAGS_seed); return engine; @@ -204,7 +204,7 @@ std::ranlux24 &get_engine() // helper for writing out rapidjson Values -std::ostream &operator<<(std::ostream &os, const rapidjson::Value &value) +std::ostream& operator<<(std::ostream& os, const rapidjson::Value& value) { rapidjson::OStreamWrapper jos(os); rapidjson::PrettyWriter, @@ -220,8 +220,8 @@ std::ostream &operator<<(std::ostream &os, const rapidjson::Value &value) template std::enable_if_t< !std::is_same::type, gko::size_type>::value, void> -add_or_set_member(rapidjson::Value &object, NameType &&name, T &&value, - Allocator &&allocator) +add_or_set_member(rapidjson::Value& object, NameType&& name, T&& value, + Allocator&& allocator) { if (object.HasMember(name)) { object[name] = std::forward(value); @@ -241,8 +241,8 @@ add_or_set_member(rapidjson::Value &object, NameType &&name, T &&value, template std::enable_if_t< std::is_same::type, gko::size_type>::value, void> -add_or_set_member(rapidjson::Value &object, NameType &&name, T &&value, - Allocator &&allocator) +add_or_set_member(rapidjson::Value& object, NameType&& name, T&& value, + Allocator&& allocator) { if (object.HasMember(name)) { object[name] = @@ -257,7 +257,7 @@ add_or_set_member(rapidjson::Value &object, NameType &&name, T &&value, // helper for splitting a delimiter-separated list into vector of strings -std::vector split(const std::string &s, char delimiter = ',') +std::vector split(const std::string& s, char delimiter = ',') { std::istringstream iss(s); std::vector tokens; @@ -270,7 +270,7 @@ std::vector split(const std::string &s, char delimiter = ',') // backup generation -void backup_results(rapidjson::Document &results) +void backup_results(rapidjson::Document& results) { static int next = 0; static auto filenames = []() -> std::array { @@ -376,7 +376,7 @@ std::unique_ptr> create_matrix( template std::unique_ptr> create_matrix( std::shared_ptr exec, gko::dim<2> size, - RandomEngine &engine) + RandomEngine& engine) { auto res = vec::create(exec); res->read(gko::matrix_data( @@ -403,7 +403,7 @@ std::unique_ptr> create_vector( template std::unique_ptr> create_vector( std::shared_ptr exec, gko::size_type size, - RandomEngine &engine) + RandomEngine& engine) { return create_matrix(exec, gko::dim<2>{size, 1}, engine); } @@ -411,14 +411,14 @@ std::unique_ptr> create_vector( // utilities for computing norms and residuals template -ValueType get_norm(const vec *norm) +ValueType get_norm(const vec* norm) { return clone(norm->get_executor()->get_master(), norm)->at(0, 0); } template -gko::remove_complex compute_norm2(const vec *b) +gko::remove_complex compute_norm2(const vec* b) { auto exec = b->get_executor(); auto b_norm = @@ -430,8 +430,8 @@ gko::remove_complex compute_norm2(const vec *b) template gko::remove_complex compute_residual_norm( - const gko::LinOp *system_matrix, const vec *b, - const vec *x) + const gko::LinOp* system_matrix, const vec* b, + const vec* x) { auto exec = system_matrix->get_executor(); auto one = gko::initialize>({1.0}, exec); @@ -444,7 +444,7 @@ gko::remove_complex compute_residual_norm( template gko::remove_complex compute_max_relative_norm2( - vec *result, const vec *answer) + vec* result, const vec* answer) { using rc_vtype = gko::remove_complex; auto exec = answer->get_executor(); @@ -526,7 +526,7 @@ class IterationControl { * * @param timer the timer that is to be used for the timings */ - explicit IterationControl(const std::shared_ptr &timer) + explicit IterationControl(const std::shared_ptr& timer) { status_warmup_ = {TimerManager{timer, false}, FLAGS_warmup, FLAGS_warmup, 0., 0}; @@ -541,8 +541,8 @@ class IterationControl { } IterationControl() = default; - IterationControl(const IterationControl &) = default; - IterationControl(IterationControl &&) = default; + IterationControl(const IterationControl&) = default; + IterationControl(IterationControl&&) = default; /** * Creates iterable `run_control` object for the warmup run. @@ -685,7 +685,7 @@ class IterationControl { * * @return true if benchmark is not finished, else false */ - bool operator!=(const iterator &) + bool operator!=(const iterator&) { const bool is_finished = cur_info->is_finished(); if (!is_finished && stopped) { @@ -698,7 +698,7 @@ class IterationControl { return !is_finished; } - status *cur_info; + status* cur_info; IndexType next_timing = 1; //!< next iteration to stop timing bool stopped = true; }; @@ -708,7 +708,7 @@ class IterationControl { // not used, could potentially used in c++17 as a sentinel iterator end() const { return iterator{}; } - status *info; + status* info; }; status status_warmup_; diff --git a/benchmark/utils/hip_linops.hip.hpp b/benchmark/utils/hip_linops.hip.hpp index f9b2066aab5..12d9713c173 100644 --- a/benchmark/utils/hip_linops.hip.hpp +++ b/benchmark/utils/hip_linops.hip.hpp @@ -58,17 +58,17 @@ class HipspBase : public gko::LinOp { public: hipsparseMatDescr_t get_descr() const { return this->descr_.get(); } - const gko::HipExecutor *get_gpu_exec() const { return gpu_exec_.get(); } + const gko::HipExecutor* get_gpu_exec() const { return gpu_exec_.get(); } protected: - void apply_impl(const gko::LinOp *, const gko::LinOp *, const gko::LinOp *, - gko::LinOp *) const override + void apply_impl(const gko::LinOp*, const gko::LinOp*, const gko::LinOp*, + gko::LinOp*) const override { GKO_NOT_IMPLEMENTED; } HipspBase(std::shared_ptr exec, - const gko::dim<2> &size = gko::dim<2>{}) + const gko::dim<2>& size = gko::dim<2>{}) : gko::LinOp(exec, size) { gpu_exec_ = std::dynamic_pointer_cast(exec); @@ -80,9 +80,9 @@ class HipspBase : public gko::LinOp { ~HipspBase() = default; - HipspBase(const HipspBase &other) = delete; + HipspBase(const HipspBase& other) = delete; - HipspBase &operator=(const HipspBase &other) + HipspBase& operator=(const HipspBase& other) { if (this != &other) { gko::LinOp::operator=(other); @@ -97,9 +97,9 @@ class HipspBase : public gko::LinOp { const auto id = this->gpu_exec_->get_device_id(); gko::hip::device_guard g{id}; this->descr_ = handle_manager( - reinterpret_cast( + reinterpret_cast( gko::kernels::hip::hipsparse::create_mat_descr()), - [id](hipsparseMatDescr *descr) { + [id](hipsparseMatDescr* descr) { gko::hip::device_guard g{id}; gko::kernels::hip::hipsparse::destroy(descr); }); @@ -108,7 +108,7 @@ class HipspBase : public gko::LinOp { private: std::shared_ptr gpu_exec_; template - using handle_manager = std::unique_ptr>; + using handle_manager = std::unique_ptr>; handle_manager descr_; }; @@ -126,7 +126,7 @@ class HipspCsr using csr = gko::matrix::Csr; using mat_data = gko::matrix_data; - void read(const mat_data &data) override + void read(const mat_data& data) override { csr_->read(data); this->set_size(gko::dim<2>{csr_->get_size()}); @@ -138,7 +138,7 @@ class HipspCsr } protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override { auto dense_b = gko::as>(b); auto dense_x = gko::as>(x); @@ -157,7 +157,7 @@ class HipspCsr } HipspCsr(std::shared_ptr exec, - const gko::dim<2> &size = gko::dim<2>{}) + const gko::dim<2>& size = gko::dim<2>{}) : gko::EnableLinOp(exec, size), csr_(std::move( csr::create(exec, std::make_shared()))), @@ -186,7 +186,7 @@ class HipspCsrmm using csr = gko::matrix::Csr; using mat_data = gko::matrix_data; - void read(const mat_data &data) override + void read(const mat_data& data) override { csr_->read(data); this->set_size(gko::dim<2>{csr_->get_size()}); @@ -198,7 +198,7 @@ class HipspCsrmm } protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override { auto dense_b = gko::as>(b); auto dense_x = gko::as>(x); @@ -218,7 +218,7 @@ class HipspCsrmm } HipspCsrmm(std::shared_ptr exec, - const gko::dim<2> &size = gko::dim<2>{}) + const gko::dim<2>& size = gko::dim<2>{}) : gko::EnableLinOp(exec, size), csr_(std::move( csr::create(exec, std::make_shared()))), @@ -251,7 +251,7 @@ class HipspHybrid using csr = gko::matrix::Csr; using mat_data = gko::matrix_data; - void read(const mat_data &data) override + void read(const mat_data& data) override { auto t_csr = csr::create(this->get_executor(), std::make_shared()); @@ -273,18 +273,18 @@ class HipspHybrid try { gko::hip::device_guard g{id}; GKO_ASSERT_NO_HIPSPARSE_ERRORS(hipsparseDestroyHybMat(hyb_)); - } catch (const std::exception &e) { + } catch (const std::exception& e) { std::cerr << "Error when unallocating HipspHybrid hyb_ matrix: " << e.what() << std::endl; } } - HipspHybrid(const HipspHybrid &other) = delete; + HipspHybrid(const HipspHybrid& other) = delete; - HipspHybrid &operator=(const HipspHybrid &other) = default; + HipspHybrid& operator=(const HipspHybrid& other) = default; protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override { auto dense_b = gko::as>(b); auto dense_x = gko::as>(x); @@ -300,7 +300,7 @@ class HipspHybrid } HipspHybrid(std::shared_ptr exec, - const gko::dim<2> &size = gko::dim<2>{}) + const gko::dim<2>& size = gko::dim<2>{}) : gko::EnableLinOp(exec, size), trans_(HIPSPARSE_OPERATION_NON_TRANSPOSE) { diff --git a/benchmark/utils/loggers.hpp b/benchmark/utils/loggers.hpp index 983cd2951fb..f13a4acc84a 100644 --- a/benchmark/utils/loggers.hpp +++ b/benchmark/utils/loggers.hpp @@ -49,65 +49,65 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // A logger that accumulates the time of all operations struct OperationLogger : gko::log::Logger { - void on_allocation_started(const gko::Executor *exec, - const gko::size_type &) const override + void on_allocation_started(const gko::Executor* exec, + const gko::size_type&) const override { this->start_operation(exec, "allocate"); } - void on_allocation_completed(const gko::Executor *exec, - const gko::size_type &, - const gko::uintptr &) const override + void on_allocation_completed(const gko::Executor* exec, + const gko::size_type&, + const gko::uintptr&) const override { this->end_operation(exec, "allocate"); } - void on_free_started(const gko::Executor *exec, - const gko::uintptr &) const override + void on_free_started(const gko::Executor* exec, + const gko::uintptr&) const override { this->start_operation(exec, "free"); } - void on_free_completed(const gko::Executor *exec, - const gko::uintptr &) const override + void on_free_completed(const gko::Executor* exec, + const gko::uintptr&) const override { this->end_operation(exec, "free"); } - void on_copy_started(const gko::Executor *from, const gko::Executor *to, - const gko::uintptr &, const gko::uintptr &, - const gko::size_type &) const override + void on_copy_started(const gko::Executor* from, const gko::Executor* to, + const gko::uintptr&, const gko::uintptr&, + const gko::size_type&) const override { from->synchronize(); this->start_operation(to, "copy"); } - void on_copy_completed(const gko::Executor *from, const gko::Executor *to, - const gko::uintptr &, const gko::uintptr &, - const gko::size_type &) const override + void on_copy_completed(const gko::Executor* from, const gko::Executor* to, + const gko::uintptr&, const gko::uintptr&, + const gko::size_type&) const override { from->synchronize(); this->end_operation(to, "copy"); } - void on_operation_launched(const gko::Executor *exec, - const gko::Operation *op) const override + void on_operation_launched(const gko::Executor* exec, + const gko::Operation* op) const override { this->start_operation(exec, op->get_name()); } - void on_operation_completed(const gko::Executor *exec, - const gko::Operation *op) const override + void on_operation_completed(const gko::Executor* exec, + const gko::Operation* op) const override { this->end_operation(exec, op->get_name()); } - void write_data(rapidjson::Value &object, - rapidjson::MemoryPoolAllocator<> &alloc, + void write_data(rapidjson::Value& object, + rapidjson::MemoryPoolAllocator<>& alloc, gko::uint32 repetitions) { const std::lock_guard lock(mutex); - for (const auto &entry : total) { + for (const auto& entry : total) { add_or_set_member( object, entry.first.c_str(), std::chrono::duration(entry.second).count() / @@ -121,8 +121,8 @@ struct OperationLogger : gko::log::Logger { {} private: - void start_operation(const gko::Executor *exec, - const std::string &name) const + void start_operation(const gko::Executor* exec, + const std::string& name) const { exec->synchronize(); const std::lock_guard lock(mutex); @@ -133,7 +133,7 @@ struct OperationLogger : gko::log::Logger { start[nested_name] = std::chrono::steady_clock::now(); } - void end_operation(const gko::Executor *exec, const std::string &name) const + void end_operation(const gko::Executor* exec, const std::string& name) const { exec->synchronize(); const std::lock_guard lock(mutex); @@ -162,27 +162,27 @@ struct OperationLogger : gko::log::Logger { struct StorageLogger : gko::log::Logger { - void on_allocation_completed(const gko::Executor *, - const gko::size_type &num_bytes, - const gko::uintptr &location) const override + void on_allocation_completed(const gko::Executor*, + const gko::size_type& num_bytes, + const gko::uintptr& location) const override { const std::lock_guard lock(mutex); storage[location] = num_bytes; } - void on_free_completed(const gko::Executor *, - const gko::uintptr &location) const override + void on_free_completed(const gko::Executor*, + const gko::uintptr& location) const override { const std::lock_guard lock(mutex); storage[location] = 0; } - void write_data(rapidjson::Value &output, - rapidjson::MemoryPoolAllocator<> &allocator) + void write_data(rapidjson::Value& output, + rapidjson::MemoryPoolAllocator<>& allocator) { const std::lock_guard lock(mutex); gko::size_type total{}; - for (const auto &e : storage) { + for (const auto& e : storage) { total += e.second; } add_or_set_member(output, "storage", total, allocator); @@ -204,20 +204,20 @@ struct ResidualLogger : gko::log::Logger { using rc_vtype = gko::remove_complex; // TODO2.0: Remove when deprecating simple overload - void on_iteration_complete(const gko::LinOp *solver, - const gko::size_type &it, - const gko::LinOp *residual, - const gko::LinOp *solution, - const gko::LinOp *residual_norm) const override + void on_iteration_complete(const gko::LinOp* solver, + const gko::size_type& it, + const gko::LinOp* residual, + const gko::LinOp* solution, + const gko::LinOp* residual_norm) const override { on_iteration_complete(solver, it, residual, solution, residual_norm, nullptr); } void on_iteration_complete( - const gko::LinOp *, const gko::size_type &, const gko::LinOp *residual, - const gko::LinOp *solution, const gko::LinOp *residual_norm, - const gko::LinOp *implicit_sq_residual_norm) const override + const gko::LinOp*, const gko::size_type&, const gko::LinOp* residual, + const gko::LinOp* solution, const gko::LinOp* residual_norm, + const gko::LinOp* implicit_sq_residual_norm) const override { timestamps.PushBack(std::chrono::duration( std::chrono::steady_clock::now() - start) @@ -250,12 +250,12 @@ struct ResidualLogger : gko::log::Logger { } ResidualLogger(std::shared_ptr exec, - const gko::LinOp *matrix, const vec *b, - rapidjson::Value &rec_res_norms, - rapidjson::Value &true_res_norms, - rapidjson::Value &implicit_res_norms, - rapidjson::Value ×tamps, - rapidjson::MemoryPoolAllocator<> &alloc) + const gko::LinOp* matrix, const vec* b, + rapidjson::Value& rec_res_norms, + rapidjson::Value& true_res_norms, + rapidjson::Value& implicit_res_norms, + rapidjson::Value& timestamps, + rapidjson::MemoryPoolAllocator<>& alloc) : gko::log::Logger(exec, gko::log::Logger::iteration_complete_mask), matrix{matrix}, b{b}, @@ -271,24 +271,24 @@ struct ResidualLogger : gko::log::Logger { bool has_implicit_res_norms() const { return has_implicit_res_norm; } private: - const gko::LinOp *matrix; - const vec *b; + const gko::LinOp* matrix; + const vec* b; std::chrono::steady_clock::time_point start; - rapidjson::Value &rec_res_norms; - rapidjson::Value &true_res_norms; + rapidjson::Value& rec_res_norms; + rapidjson::Value& true_res_norms; mutable bool has_implicit_res_norm; - rapidjson::Value &implicit_res_norms; - rapidjson::Value ×tamps; - rapidjson::MemoryPoolAllocator<> &alloc; + rapidjson::Value& implicit_res_norms; + rapidjson::Value& timestamps; + rapidjson::MemoryPoolAllocator<>& alloc; }; // Logs the number of iteration executed struct IterationLogger : gko::log::Logger { - void on_iteration_complete(const gko::LinOp *, - const gko::size_type &num_iterations, - const gko::LinOp *, const gko::LinOp *, - const gko::LinOp *) const override + void on_iteration_complete(const gko::LinOp*, + const gko::size_type& num_iterations, + const gko::LinOp*, const gko::LinOp*, + const gko::LinOp*) const override { this->num_iters = num_iterations; } @@ -297,8 +297,8 @@ struct IterationLogger : gko::log::Logger { : gko::log::Logger(exec, gko::log::Logger::iteration_complete_mask) {} - void write_data(rapidjson::Value &output, - rapidjson::MemoryPoolAllocator<> &allocator) + void write_data(rapidjson::Value& output, + rapidjson::MemoryPoolAllocator<>& allocator) { add_or_set_member(output, "iterations", this->num_iters, allocator); } diff --git a/benchmark/utils/overhead_linop.hpp b/benchmark/utils/overhead_linop.hpp index 8ade2e74f83..b03de5f2f15 100644 --- a/benchmark/utils/overhead_linop.hpp +++ b/benchmark/utils/overhead_linop.hpp @@ -53,8 +53,8 @@ namespace overhead { static volatile std::uintptr_t val_operation_##_num = 0; \ template \ void operation##_num(std::shared_ptr exec, \ - const matrix::Dense<_type> *b, \ - matrix::Dense<_type> *x) \ + const matrix::Dense<_type>* b, \ + matrix::Dense<_type>* x) \ { \ val_operation_##_num = reinterpret_cast(x); \ } @@ -169,7 +169,7 @@ class Overhead : public EnableLinOp>, GKO_ENABLE_BUILD_METHOD(Factory); protected: - void apply_impl(const LinOp *b, LinOp *x) const override + void apply_impl(const LinOp* b, LinOp* x) const override { using Vector = matrix::Dense; @@ -186,8 +186,8 @@ class Overhead : public EnableLinOp>, exec->run(overhead::make_operation4(dense_b, dense_x)); } - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override { auto dense_x = as>(x); @@ -201,7 +201,7 @@ class Overhead : public EnableLinOp>, : EnableLinOp(std::move(exec)) {} - explicit Overhead(const Factory *factory, + explicit Overhead(const Factory* factory, std::shared_ptr system_matrix) : EnableLinOp(factory->get_executor(), transpose(system_matrix->get_size())), diff --git a/benchmark/utils/preconditioners.hpp b/benchmark/utils/preconditioners.hpp index b9f08348b6d..138dfa4cc0b 100644 --- a/benchmark/utils/preconditioners.hpp +++ b/benchmark/utils/preconditioners.hpp @@ -82,7 +82,7 @@ DEFINE_uint32(jacobi_max_block_size, 32, // parses the Jacobi storage optimization command line argument -gko::precision_reduction parse_storage_optimization(const std::string &flag) +gko::precision_reduction parse_storage_optimization(const std::string& flag) { if (flag == "autodetect") { return gko::precision_reduction::autodetect(); diff --git a/benchmark/utils/spmv_common.hpp b/benchmark/utils/spmv_common.hpp index 4fd4ff21d95..c06dc80dc8b 100644 --- a/benchmark/utils/spmv_common.hpp +++ b/benchmark/utils/spmv_common.hpp @@ -63,7 +63,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * @param value the JSON value to test. */ -void validate_option_object(const rapidjson::Value &value) +void validate_option_object(const rapidjson::Value& value) { if (!value.IsObject() || !value.HasMember("filename") || !value["filename"].IsString()) { diff --git a/common/cuda_hip/components/absolute_array.hpp.inc b/common/cuda_hip/components/absolute_array.hpp.inc index 8c82324175d..e487320cf07 100644 --- a/common/cuda_hip/components/absolute_array.hpp.inc +++ b/common/cuda_hip/components/absolute_array.hpp.inc @@ -36,7 +36,7 @@ namespace kernel { template __global__ __launch_bounds__(default_block_size) void inplace_absolute_array_kernel( - const size_type n, ValueType *__restrict__ array) + const size_type n, ValueType* __restrict__ array) { const auto tidx = thread::get_thread_id_flat(); if (tidx < n) { @@ -48,8 +48,8 @@ __global__ template __global__ __launch_bounds__(default_block_size) void outplace_absolute_array_kernel( - const size_type n, const ValueType *__restrict__ in, - remove_complex *__restrict__ out) + const size_type n, const ValueType* __restrict__ in, + remove_complex* __restrict__ out) { const auto tidx = thread::get_thread_id_flat(); if (tidx < n) { diff --git a/common/cuda_hip/components/atomic.hpp.inc b/common/cuda_hip/components/atomic.hpp.inc index a1c514f8a55..e355ad05b72 100644 --- a/common/cuda_hip/components/atomic.hpp.inc +++ b/common/cuda_hip/components/atomic.hpp.inc @@ -36,7 +36,7 @@ namespace detail { template struct atomic_helper { - __forceinline__ __device__ static ValueType atomic_add(ValueType *, + __forceinline__ __device__ static ValueType atomic_add(ValueType*, ValueType) { static_assert(sizeof(ValueType) == 0, @@ -44,7 +44,7 @@ struct atomic_helper { "specializations are."); // TODO: add proper implementation of generic atomic add } - __forceinline__ __device__ static ValueType atomic_max(ValueType *, + __forceinline__ __device__ static ValueType atomic_max(ValueType*, ValueType) { static_assert(sizeof(ValueType) == 0, @@ -61,7 +61,7 @@ __forceinline__ __device__ ResultType reinterpret(ValueType val) static_assert(sizeof(ValueType) == sizeof(ResultType), "The type to reinterpret to must be of the same size as the " "original type."); - return reinterpret_cast(val); + return reinterpret_cast(val); } @@ -71,22 +71,22 @@ __forceinline__ __device__ ResultType reinterpret(ValueType val) ValueType, \ std::enable_if_t<(sizeof(ValueType) == sizeof(CONVERTER_TYPE))>> { \ __forceinline__ __device__ static ValueType atomic_add( \ - ValueType *__restrict__ addr, ValueType val) \ + ValueType* __restrict__ addr, ValueType val) \ { \ using c_type = CONVERTER_TYPE; \ - return atomic_wrapper(addr, [&val](c_type &old, c_type assumed, \ - c_type *c_addr) { \ + return atomic_wrapper(addr, [&val](c_type& old, c_type assumed, \ + c_type* c_addr) { \ old = atomicCAS(c_addr, assumed, \ reinterpret( \ val + reinterpret(assumed))); \ }); \ } \ __forceinline__ __device__ static ValueType atomic_max( \ - ValueType *__restrict__ addr, ValueType val) \ + ValueType* __restrict__ addr, ValueType val) \ { \ using c_type = CONVERTER_TYPE; \ return atomic_wrapper( \ - addr, [&val](c_type &old, c_type assumed, c_type *c_addr) { \ + addr, [&val](c_type& old, c_type assumed, c_type* c_addr) { \ if (reinterpret(assumed) < val) { \ old = atomicCAS(c_addr, assumed, \ reinterpret(val)); \ @@ -97,10 +97,10 @@ __forceinline__ __device__ ResultType reinterpret(ValueType val) private: \ template \ __forceinline__ __device__ static ValueType atomic_wrapper( \ - ValueType *__restrict__ addr, Callable set_old) \ + ValueType* __restrict__ addr, Callable set_old) \ { \ - CONVERTER_TYPE *address_as_converter = \ - reinterpret_cast(addr); \ + CONVERTER_TYPE* address_as_converter = \ + reinterpret_cast(addr); \ CONVERTER_TYPE old = *address_as_converter; \ CONVERTER_TYPE assumed; \ do { \ @@ -129,7 +129,7 @@ GKO_BIND_ATOMIC_HELPER_STRUCTURE(unsigned short int); template -__forceinline__ __device__ T atomic_add(T *__restrict__ addr, T val) +__forceinline__ __device__ T atomic_add(T* __restrict__ addr, T val) { return detail::atomic_helper::atomic_add(addr, val); } @@ -137,7 +137,7 @@ __forceinline__ __device__ T atomic_add(T *__restrict__ addr, T val) #define GKO_BIND_ATOMIC_ADD(ValueType) \ __forceinline__ __device__ ValueType atomic_add( \ - ValueType *__restrict__ addr, ValueType val) \ + ValueType* __restrict__ addr, ValueType val) \ { \ return atomicAdd(addr, val); \ } @@ -186,7 +186,7 @@ GKO_BIND_ATOMIC_ADD(__half2); #undef GKO_BIND_ATOMIC_ADD template -__forceinline__ __device__ T atomic_max(T *__restrict__ addr, T val) +__forceinline__ __device__ T atomic_max(T* __restrict__ addr, T val) { return detail::atomic_helper::atomic_max(addr, val); } @@ -194,7 +194,7 @@ __forceinline__ __device__ T atomic_max(T *__restrict__ addr, T val) #define GKO_BIND_ATOMIC_MAX(ValueType) \ __forceinline__ __device__ ValueType atomic_max( \ - ValueType *__restrict__ addr, ValueType val) \ + ValueType* __restrict__ addr, ValueType val) \ { \ return atomicMax(addr, val); \ } diff --git a/common/cuda_hip/components/diagonal_block_manipulation.hpp.inc b/common/cuda_hip/components/diagonal_block_manipulation.hpp.inc index baf991550e8..40ba5d030a6 100644 --- a/common/cuda_hip/components/diagonal_block_manipulation.hpp.inc +++ b/common/cuda_hip/components/diagonal_block_manipulation.hpp.inc @@ -41,13 +41,13 @@ template < typename IndexType, typename = std::enable_if_t::value>> __device__ __forceinline__ void extract_transposed_diag_blocks( - const Group &group, int processed_blocks, - const IndexType *__restrict__ row_ptrs, - const IndexType *__restrict__ col_idxs, - const ValueType *__restrict__ values, - const IndexType *__restrict__ block_ptrs, size_type num_blocks, - ValueType *__restrict__ block_row, int increment, - ValueType *__restrict__ workspace) + const Group& group, int processed_blocks, + const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, + const ValueType* __restrict__ values, + const IndexType* __restrict__ block_ptrs, size_type num_blocks, + ValueType* __restrict__ block_row, int increment, + ValueType* __restrict__ workspace) { const int tid = threadIdx.y * blockDim.x + threadIdx.x; const auto warp = group::tiled_partition(group); diff --git a/common/cuda_hip/components/fill_array.hpp.inc b/common/cuda_hip/components/fill_array.hpp.inc index d33350636c3..b352f7d5334 100644 --- a/common/cuda_hip/components/fill_array.hpp.inc +++ b/common/cuda_hip/components/fill_array.hpp.inc @@ -36,7 +36,7 @@ namespace kernel { template __global__ __launch_bounds__(default_block_size) void fill_array( - size_type n, ValueType *__restrict__ array, ValueType val) + size_type n, ValueType* __restrict__ array, ValueType val) { const auto tidx = thread::get_thread_id_flat(); if (tidx < n) { @@ -47,7 +47,7 @@ __global__ __launch_bounds__(default_block_size) void fill_array( template __global__ __launch_bounds__(default_block_size) void fill_seq_array( - size_type n, ValueType *__restrict__ array) + size_type n, ValueType* __restrict__ array) { const auto tidx = thread::get_thread_id_flat(); if (tidx < n) { diff --git a/common/cuda_hip/components/merging.hpp.inc b/common/cuda_hip/components/merging.hpp.inc index 4ddd68b5beb..ff1749978a9 100644 --- a/common/cuda_hip/components/merging.hpp.inc +++ b/common/cuda_hip/components/merging.hpp.inc @@ -134,9 +134,9 @@ __forceinline__ __device__ detail::merge_result group_merge_step( */ template -__forceinline__ __device__ void group_merge(const ValueType *__restrict__ a, +__forceinline__ __device__ void group_merge(const ValueType* __restrict__ a, IndexType a_size, - const ValueType *__restrict__ b, + const ValueType* __restrict__ b, IndexType b_size, Group group, Callback merge_fn) { @@ -211,9 +211,9 @@ __forceinline__ __device__ void group_merge(const ValueType *__restrict__ a, */ template -__forceinline__ __device__ void group_match(const ValueType *__restrict__ a, +__forceinline__ __device__ void group_match(const ValueType* __restrict__ a, IndexType a_size, - const ValueType *__restrict__ b, + const ValueType* __restrict__ b, IndexType b_size, Group group, Callback match_fn) { @@ -247,8 +247,8 @@ __forceinline__ __device__ void group_match(const ValueType *__restrict__ a, */ template __forceinline__ __device__ void sequential_merge( - const ValueType *__restrict__ a, IndexType a_size, - const ValueType *__restrict__ b, IndexType b_size, Callback merge_fn) + const ValueType* __restrict__ a, IndexType a_size, + const ValueType* __restrict__ b, IndexType b_size, Callback merge_fn) { auto c_size = a_size + b_size; IndexType a_begin{}; @@ -293,9 +293,9 @@ __forceinline__ __device__ void sequential_merge( * indices of the matching values from a and b. */ template -__forceinline__ __device__ void sequential_match(const ValueType *a, +__forceinline__ __device__ void sequential_match(const ValueType* a, IndexType a_size, - const ValueType *b, + const ValueType* b, IndexType b_size, Callback match_fn) { diff --git a/common/cuda_hip/components/prefix_sum.hpp.inc b/common/cuda_hip/components/prefix_sum.hpp.inc index 1d57c20b2e5..c0b9f215cb2 100644 --- a/common/cuda_hip/components/prefix_sum.hpp.inc +++ b/common/cuda_hip/components/prefix_sum.hpp.inc @@ -49,8 +49,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ template __forceinline__ __device__ void subwarp_prefix_sum(ValueType element, - ValueType &prefix_sum, - ValueType &total_sum, + ValueType& prefix_sum, + ValueType& total_sum, Group subwarp) { prefix_sum = inclusive ? element : zero(); @@ -81,7 +81,7 @@ __forceinline__ __device__ void subwarp_prefix_sum(ValueType element, */ template __forceinline__ __device__ void subwarp_prefix_sum(ValueType element, - ValueType &prefix_sum, + ValueType& prefix_sum, Group subwarp) { ValueType tmp{}; @@ -107,8 +107,8 @@ __forceinline__ __device__ void subwarp_prefix_sum(ValueType element, */ template __global__ __launch_bounds__(block_size) void start_prefix_sum( - size_type num_elements, ValueType *__restrict__ elements, - ValueType *__restrict__ block_sum) + size_type num_elements, ValueType* __restrict__ elements, + ValueType* __restrict__ block_sum) { const auto tidx = thread::get_thread_id_flat(); const auto element_id = threadIdx.x; @@ -174,8 +174,8 @@ __global__ __launch_bounds__(block_size) void start_prefix_sum( */ template __global__ __launch_bounds__(block_size) void finalize_prefix_sum( - size_type num_elements, ValueType *__restrict__ elements, - const ValueType *__restrict__ block_sum) + size_type num_elements, ValueType* __restrict__ elements, + const ValueType* __restrict__ block_sum) { const auto tidx = thread::get_thread_id_flat(); diff --git a/common/cuda_hip/components/reduction.hpp.inc b/common/cuda_hip/components/reduction.hpp.inc index bbdaf1217ae..9b4ed4cc8c7 100644 --- a/common/cuda_hip/components/reduction.hpp.inc +++ b/common/cuda_hip/components/reduction.hpp.inc @@ -46,7 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. template < typename Group, typename ValueType, typename Operator, typename = std::enable_if_t::value>> -__device__ __forceinline__ ValueType reduce(const Group &group, +__device__ __forceinline__ ValueType reduce(const Group& group, ValueType local_data, Operator reduce_op = Operator{}) { @@ -70,7 +70,7 @@ __device__ __forceinline__ ValueType reduce(const Group &group, template < typename Group, typename ValueType, typename = std::enable_if_t::value>> -__device__ __forceinline__ int choose_pivot(const Group &group, +__device__ __forceinline__ int choose_pivot(const Group& group, ValueType local_data, bool is_pivoted) { @@ -103,8 +103,8 @@ __device__ __forceinline__ int choose_pivot(const Group &group, template < typename Group, typename ValueType, typename Operator, typename = std::enable_if_t::value>> -__device__ void reduce(const Group &__restrict__ group, - ValueType *__restrict__ data, +__device__ void reduce(const Group& __restrict__ group, + ValueType* __restrict__ data, Operator reduce_op = Operator{}) { const auto local_id = group.thread_rank(); @@ -144,8 +144,8 @@ __device__ void reduce(const Group &__restrict__ group, template < typename Group, typename ValueType, typename Operator, typename = xstd::enable_if_t::value>> -__device__ void multireduce(const Group &__restrict__ group, - ValueType *__restrict__ data, size_type stride, +__device__ void multireduce(const Group& __restrict__ group, + ValueType* __restrict__ data, size_type stride, size_type num, Operator reduce_op = Operator{}) { const auto local_id = group.thread_rank(); @@ -185,8 +185,8 @@ __device__ void multireduce(const Group &__restrict__ group, */ template __device__ void reduce_array(size_type size, - const ValueType *__restrict__ source, - ValueType *__restrict__ result, + const ValueType* __restrict__ source, + ValueType* __restrict__ result, Operator reduce_op = Operator{}) { const auto tidx = thread::get_thread_id_flat(); @@ -212,12 +212,12 @@ __device__ void reduce_array(size_type size, */ template __global__ __launch_bounds__(default_block_size) void reduce_add_array( - size_type size, const ValueType *__restrict__ source, - ValueType *__restrict__ result) + size_type size, const ValueType* __restrict__ source, + ValueType* __restrict__ result) { __shared__ UninitializedArray block_sum; - reduce_array(size, source, static_cast(block_sum), - [](const ValueType &x, const ValueType &y) { return x + y; }); + reduce_array(size, source, static_cast(block_sum), + [](const ValueType& x, const ValueType& y) { return x + y; }); if (threadIdx.x == 0) { result[blockIdx.x] = block_sum[0]; diff --git a/common/cuda_hip/components/segment_scan.hpp.inc b/common/cuda_hip/components/segment_scan.hpp.inc index f16b3794d4d..8f488d3f4c4 100644 --- a/common/cuda_hip/components/segment_scan.hpp.inc +++ b/common/cuda_hip/components/segment_scan.hpp.inc @@ -40,8 +40,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ template __device__ __forceinline__ bool segment_scan( - const group::thread_block_tile &group, const IndexType ind, - ValueType *__restrict__ val) + const group::thread_block_tile& group, const IndexType ind, + ValueType* __restrict__ val) { bool head = true; #pragma unroll diff --git a/common/cuda_hip/components/sorting.hpp.inc b/common/cuda_hip/components/sorting.hpp.inc index cd772e08adb..33173a5c3a8 100644 --- a/common/cuda_hip/components/sorting.hpp.inc +++ b/common/cuda_hip/components/sorting.hpp.inc @@ -41,7 +41,7 @@ namespace detail { * descending order if `true`. */ template -__forceinline__ __device__ void bitonic_cas(ValueType &a, ValueType &b, +__forceinline__ __device__ void bitonic_cas(ValueType& a, ValueType& b, bool reverse) { auto tmp = a; @@ -66,7 +66,7 @@ struct bitonic_local { "number of elements must be a power of two"); // merges two bitonic sequences els[0, n / 2), els[n / 2, n) - __forceinline__ __host__ __device__ static void merge(ValueType *els, + __forceinline__ __host__ __device__ static void merge(ValueType* els, bool reverse) { auto els_mid = els + (num_elements / 2); @@ -78,7 +78,7 @@ struct bitonic_local { } // sorts an unsorted sequence els [0, n) - __forceinline__ __device__ static void sort(ValueType *els, bool reverse) + __forceinline__ __device__ static void sort(ValueType* els, bool reverse) { auto els_mid = els + (num_elements / 2); // sort first half normally @@ -93,8 +93,8 @@ struct bitonic_local { template struct bitonic_local { // nothing to do for a single element - __forceinline__ __device__ static void merge(ValueType *, bool) {} - __forceinline__ __device__ static void sort(ValueType *, bool) {} + __forceinline__ __device__ static void merge(ValueType*, bool) {} + __forceinline__ __device__ static void sort(ValueType*, bool) {} }; @@ -126,7 +126,7 @@ struct bitonic_warp { return bool(threadIdx.x & (num_threads / 2)); } - __forceinline__ __device__ static void merge(ValueType *els, bool reverse) + __forceinline__ __device__ static void merge(ValueType* els, bool reverse) { auto tile = group::tiled_partition(group::this_thread_block()); @@ -138,7 +138,7 @@ struct bitonic_warp { half::merge(els, reverse); } - __forceinline__ __device__ static void sort(ValueType *els, bool reverse) + __forceinline__ __device__ static void sort(ValueType* els, bool reverse) { auto new_reverse = reverse != upper_half(); half::sort(els, new_reverse); @@ -149,11 +149,11 @@ struct bitonic_warp { template struct bitonic_warp { using local = bitonic_local; - __forceinline__ __device__ static void merge(ValueType *els, bool reverse) + __forceinline__ __device__ static void merge(ValueType* els, bool reverse) { local::merge(els, reverse); } - __forceinline__ __device__ static void sort(ValueType *els, bool reverse) + __forceinline__ __device__ static void sort(ValueType* els, bool reverse) { local::sort(els, reverse); } @@ -198,8 +198,8 @@ struct bitonic_global { return bool(rank & (num_groups * num_threads / 2)); } - __forceinline__ __device__ static void merge(ValueType *local_els, - ValueType *shared_els, + __forceinline__ __device__ static void merge(ValueType* local_els, + ValueType* shared_els, bool reverse) { group::this_thread_block().sync(); @@ -214,8 +214,8 @@ struct bitonic_global { half::merge(local_els, shared_els, reverse); } - __forceinline__ __device__ static void sort(ValueType *local_els, - ValueType *shared_els, + __forceinline__ __device__ static void sort(ValueType* local_els, + ValueType* shared_els, bool reverse) { auto new_reverse = reverse != upper_half(); @@ -236,8 +236,8 @@ struct bitonic_global { num_total_threads>::shared_idx(local); } - __forceinline__ __device__ static void merge(ValueType *local_els, - ValueType *shared_els, + __forceinline__ __device__ static void merge(ValueType* local_els, + ValueType* shared_els, bool reverse) { group::this_thread_block().sync(); @@ -250,8 +250,8 @@ struct bitonic_global { } } - __forceinline__ __device__ static void sort(ValueType *local_els, - ValueType *shared_els, + __forceinline__ __device__ static void sort(ValueType* local_els, + ValueType* shared_els, bool reverse) { auto rank = group::this_thread_block().thread_rank(); @@ -295,8 +295,8 @@ struct bitonic_global { * the less-than operator! */ template -__forceinline__ __device__ void bitonic_sort(ValueType *local_elements, - ValueType *shared_elements) +__forceinline__ __device__ void bitonic_sort(ValueType* local_elements, + ValueType* shared_elements) { constexpr auto num_threads = num_elements / num_local; constexpr auto num_warps = num_threads / config::warp_size; diff --git a/common/cuda_hip/components/uninitialized_array.hpp.inc b/common/cuda_hip/components/uninitialized_array.hpp.inc index e951cf06860..e82be53798c 100644 --- a/common/cuda_hip/components/uninitialized_array.hpp.inc +++ b/common/cuda_hip/components/uninitialized_array.hpp.inc @@ -49,7 +49,7 @@ public: * * @return the constexpr pointer to the first entry of the array. */ - constexpr GKO_ATTRIBUTES operator const ValueType *() const noexcept + constexpr GKO_ATTRIBUTES operator const ValueType*() const noexcept { return &(*this)[0]; } @@ -60,7 +60,7 @@ public: * * @return the non-const pointer to the first entry of the array. */ - GKO_ATTRIBUTES operator ValueType *() noexcept { return &(*this)[0]; } + GKO_ATTRIBUTES operator ValueType*() noexcept { return &(*this)[0]; } /** * constexpr array access operator. @@ -70,10 +70,10 @@ public: * * @return a reference to the array entry at the given index. */ - constexpr GKO_ATTRIBUTES const ValueType &operator[](size_type pos) const + constexpr GKO_ATTRIBUTES const ValueType& operator[](size_type pos) const noexcept { - return reinterpret_cast(data_)[pos]; + return reinterpret_cast(data_)[pos]; } /** @@ -84,9 +84,9 @@ public: * * @return a reference to the array entry at the given index. */ - GKO_ATTRIBUTES ValueType &operator[](size_type pos) noexcept + GKO_ATTRIBUTES ValueType& operator[](size_type pos) noexcept { - return reinterpret_cast(data_)[pos]; + return reinterpret_cast(data_)[pos]; } private: diff --git a/common/cuda_hip/components/warp_blas.hpp.inc b/common/cuda_hip/components/warp_blas.hpp.inc index 6c7f608511b..cd904bde1c9 100644 --- a/common/cuda_hip/components/warp_blas.hpp.inc +++ b/common/cuda_hip/components/warp_blas.hpp.inc @@ -56,8 +56,8 @@ template < int max_problem_size, typename Group, typename ValueType, typename = std::enable_if_t::value>> __device__ __forceinline__ void apply_gauss_jordan_transform( - const Group &__restrict__ group, int32 key_row, int32 key_col, - ValueType *__restrict__ row, bool &__restrict__ status) + const Group& __restrict__ group, int32 key_row, int32 key_col, + ValueType* __restrict__ row, bool& __restrict__ status) { auto key_col_elem = group.shfl(row[key_col], key_row); if (key_col_elem == zero()) { @@ -99,9 +99,9 @@ template < int max_problem_size, typename Group, typename ValueType, typename = std::enable_if_t::value>> __device__ __forceinline__ void apply_gauss_jordan_transform_with_rhs( - const Group &__restrict__ group, int32 key_row, int32 key_col, - ValueType *__restrict__ row, ValueType *__restrict__ rhs, - bool &__restrict__ status) + const Group& __restrict__ group, int32 key_row, int32 key_col, + ValueType* __restrict__ row, ValueType* __restrict__ rhs, + bool& __restrict__ status) { auto key_col_elem = group.shfl(row[key_col], key_row); auto key_rhs_elem = group.shfl(rhs[0], key_row); @@ -164,11 +164,11 @@ __device__ __forceinline__ void apply_gauss_jordan_transform_with_rhs( template < int max_problem_size, typename Group, typename ValueType, typename = std::enable_if_t::value>> -__device__ __forceinline__ bool invert_block(const Group &__restrict__ group, +__device__ __forceinline__ bool invert_block(const Group& __restrict__ group, uint32 problem_size, - ValueType *__restrict__ row, - uint32 &__restrict__ perm, - uint32 &__restrict__ trans_perm) + ValueType* __restrict__ row, + uint32& __restrict__ perm, + uint32& __restrict__ trans_perm) { GKO_ASSERT(problem_size <= max_problem_size); // prevent rows after problem_size to become pivots @@ -257,9 +257,9 @@ template < typename Group, typename SourceValueType, typename ResultValueType, typename = std::enable_if_t::value>> __device__ __forceinline__ void copy_matrix( - const Group &__restrict__ group, uint32 problem_size, - const SourceValueType *__restrict__ source_row, uint32 increment, - uint32 row_perm, uint32 col_perm, ResultValueType *__restrict__ destination, + const Group& __restrict__ group, uint32 problem_size, + const SourceValueType* __restrict__ source_row, uint32 increment, + uint32 row_perm, uint32 col_perm, ResultValueType* __restrict__ destination, size_type stride) { GKO_ASSERT(problem_size <= max_problem_size); @@ -310,10 +310,10 @@ template < typename VectorValueType, typename = std::enable_if_t::value>> __device__ __forceinline__ void multiply_transposed_vec( - const Group &__restrict__ group, uint32 problem_size, - const VectorValueType &__restrict__ vec, - const MatrixValueType *__restrict__ mtx_row, uint32 mtx_increment, - VectorValueType *__restrict__ res, uint32 res_increment) + const Group& __restrict__ group, uint32 problem_size, + const VectorValueType& __restrict__ vec, + const MatrixValueType* __restrict__ mtx_row, uint32 mtx_increment, + VectorValueType* __restrict__ res, uint32 res_increment) { GKO_ASSERT(problem_size <= max_problem_size); auto mtx_elem = zero(); @@ -370,10 +370,10 @@ template < typename VectorValueType, typename Closure, typename = std::enable_if_t::value>> __device__ __forceinline__ void multiply_vec( - const Group &__restrict__ group, uint32 problem_size, - const VectorValueType &__restrict__ vec, - const MatrixValueType *__restrict__ mtx_row, uint32 mtx_increment, - VectorValueType *__restrict__ res, uint32 res_increment, Closure closure_op) + const Group& __restrict__ group, uint32 problem_size, + const VectorValueType& __restrict__ vec, + const MatrixValueType* __restrict__ mtx_row, uint32 mtx_increment, + VectorValueType* __restrict__ res, uint32 res_increment, Closure closure_op) { GKO_ASSERT(problem_size <= max_problem_size); auto mtx_elem = zero(); @@ -418,7 +418,7 @@ template < int max_problem_size, typename Group, typename ValueType, typename = std::enable_if_t::value>> __device__ __forceinline__ remove_complex compute_infinity_norm( - const Group &group, uint32 num_rows, uint32 num_cols, const ValueType *row) + const Group& group, uint32 num_rows, uint32 num_cols, const ValueType* row) { using result_type = remove_complex; auto sum = zero(); diff --git a/common/cuda_hip/factorization/factorization_kernels.hpp.inc b/common/cuda_hip/factorization/factorization_kernels.hpp.inc index 80178bc76a7..36b09c72168 100644 --- a/common/cuda_hip/factorization/factorization_kernels.hpp.inc +++ b/common/cuda_hip/factorization/factorization_kernels.hpp.inc @@ -41,8 +41,8 @@ template struct find_helper { template static __forceinline__ __device__ bool find(Group subwarp_grp, - const IndexType *first, - const IndexType *last, + const IndexType* first, + const IndexType* last, IndexType value) { auto subwarp_idx = subwarp_grp.thread_rank(); @@ -66,8 +66,8 @@ template <> struct find_helper { template static __forceinline__ __device__ bool find(Group subwarp_grp, - const IndexType *first, - const IndexType *last, + const IndexType* first, + const IndexType* last, IndexType value) { const auto length = static_cast(last - first); @@ -88,10 +88,10 @@ template __global__ __launch_bounds__(default_block_size) void find_missing_diagonal_elements( IndexType num_rows, IndexType num_cols, - const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, - IndexType *__restrict__ elements_to_add_per_row, - bool *__restrict__ changes_required) + const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, + IndexType* __restrict__ elements_to_add_per_row, + bool* __restrict__ changes_required) { const auto total_subwarp_count = thread::get_subwarp_num_flat(); @@ -110,8 +110,8 @@ __global__ } continue; } - const auto *start_cols = col_idxs + row_ptrs[row]; - const auto *end_cols = col_idxs + row_ptrs[row + 1]; + const auto* start_cols = col_idxs + row_ptrs[row]; + const auto* end_cols = col_idxs + row_ptrs[row + 1]; if (detail::find_helper::find(subwarp_grp, start_cols, end_cols, row)) { if (subwarp_idx == 0) { @@ -136,12 +136,12 @@ __global__ template __global__ __launch_bounds__(default_block_size) void add_missing_diagonal_elements( - IndexType num_rows, const ValueType *__restrict__ old_values, - const IndexType *__restrict__ old_col_idxs, - const IndexType *__restrict__ old_row_ptrs, - ValueType *__restrict__ new_values, - IndexType *__restrict__ new_col_idxs, - const IndexType *__restrict__ row_ptrs_addition) + IndexType num_rows, const ValueType* __restrict__ old_values, + const IndexType* __restrict__ old_col_idxs, + const IndexType* __restrict__ old_row_ptrs, + ValueType* __restrict__ new_values, + IndexType* __restrict__ new_col_idxs, + const IndexType* __restrict__ row_ptrs_addition) { // Precaution in case not enough threads were created const auto total_subwarp_count = @@ -218,8 +218,8 @@ __global__ template __global__ __launch_bounds__(default_block_size) void update_row_ptrs( - IndexType num_rows, IndexType *__restrict__ row_ptrs, - IndexType *__restrict__ row_ptr_addition) + IndexType num_rows, IndexType* __restrict__ row_ptrs, + IndexType* __restrict__ row_ptr_addition) { const auto total_thread_count = thread::get_thread_num_flat(); const auto begin_row = thread::get_thread_id_flat(); @@ -232,10 +232,10 @@ __global__ __launch_bounds__(default_block_size) void update_row_ptrs( template __global__ __launch_bounds__(default_block_size) void count_nnz_per_l_u_row( - size_type num_rows, const IndexType *__restrict__ row_ptrs, - const IndexType *__restrict__ col_idxs, - const ValueType *__restrict__ values, IndexType *__restrict__ l_nnz_row, - IndexType *__restrict__ u_nnz_row) + size_type num_rows, const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, + const ValueType* __restrict__ values, IndexType* __restrict__ l_nnz_row, + IndexType* __restrict__ u_nnz_row) { const auto row = thread::get_thread_id_flat(); if (row < num_rows) { @@ -256,13 +256,13 @@ __global__ __launch_bounds__(default_block_size) void count_nnz_per_l_u_row( template __global__ __launch_bounds__(default_block_size) void initialize_l_u( - size_type num_rows, const IndexType *__restrict__ row_ptrs, - const IndexType *__restrict__ col_idxs, - const ValueType *__restrict__ values, - const IndexType *__restrict__ l_row_ptrs, - IndexType *__restrict__ l_col_idxs, ValueType *__restrict__ l_values, - const IndexType *__restrict__ u_row_ptrs, - IndexType *__restrict__ u_col_idxs, ValueType *__restrict__ u_values) + size_type num_rows, const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, + const ValueType* __restrict__ values, + const IndexType* __restrict__ l_row_ptrs, + IndexType* __restrict__ l_col_idxs, ValueType* __restrict__ l_values, + const IndexType* __restrict__ u_row_ptrs, + IndexType* __restrict__ u_col_idxs, ValueType* __restrict__ u_values) { const auto row = thread::get_thread_id_flat(); if (row < num_rows) { @@ -301,9 +301,9 @@ __global__ __launch_bounds__(default_block_size) void initialize_l_u( template __global__ __launch_bounds__(default_block_size) void count_nnz_per_l_row( - size_type num_rows, const IndexType *__restrict__ row_ptrs, - const IndexType *__restrict__ col_idxs, - const ValueType *__restrict__ values, IndexType *__restrict__ l_nnz_row) + size_type num_rows, const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, + const ValueType* __restrict__ values, IndexType* __restrict__ l_nnz_row) { const auto row = thread::get_thread_id_flat(); if (row < num_rows) { @@ -321,11 +321,11 @@ __global__ __launch_bounds__(default_block_size) void count_nnz_per_l_row( template __global__ __launch_bounds__(default_block_size) void initialize_l( - size_type num_rows, const IndexType *__restrict__ row_ptrs, - const IndexType *__restrict__ col_idxs, - const ValueType *__restrict__ values, - const IndexType *__restrict__ l_row_ptrs, - IndexType *__restrict__ l_col_idxs, ValueType *__restrict__ l_values, + size_type num_rows, const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, + const ValueType* __restrict__ values, + const IndexType* __restrict__ l_row_ptrs, + IndexType* __restrict__ l_col_idxs, ValueType* __restrict__ l_values, bool use_sqrt) { const auto row = thread::get_thread_id_flat(); diff --git a/common/cuda_hip/factorization/par_ic_kernels.hpp.inc b/common/cuda_hip/factorization/par_ic_kernels.hpp.inc index f83ab19c8ba..79fcf144721 100644 --- a/common/cuda_hip/factorization/par_ic_kernels.hpp.inc +++ b/common/cuda_hip/factorization/par_ic_kernels.hpp.inc @@ -35,7 +35,7 @@ namespace kernel { template __global__ __launch_bounds__(default_block_size) void ic_init( - const IndexType *__restrict__ l_row_ptrs, ValueType *__restrict__ l_vals, + const IndexType* __restrict__ l_row_ptrs, ValueType* __restrict__ l_vals, size_type num_rows) { auto row = thread::get_thread_id_flat(); @@ -54,11 +54,11 @@ __global__ __launch_bounds__(default_block_size) void ic_init( template __global__ __launch_bounds__(default_block_size) void ic_sweep( - const IndexType *__restrict__ a_row_idxs, - const IndexType *__restrict__ a_col_idxs, - const ValueType *__restrict__ a_vals, - const IndexType *__restrict__ l_row_ptrs, - const IndexType *__restrict__ l_col_idxs, ValueType *__restrict__ l_vals, + const IndexType* __restrict__ a_row_idxs, + const IndexType* __restrict__ a_col_idxs, + const ValueType* __restrict__ a_vals, + const IndexType* __restrict__ l_row_ptrs, + const IndexType* __restrict__ l_col_idxs, ValueType* __restrict__ l_vals, IndexType l_nnz) { const auto l_nz = thread::get_thread_id_flat(); diff --git a/common/cuda_hip/factorization/par_ict_spgeam_kernels.hpp.inc b/common/cuda_hip/factorization/par_ict_spgeam_kernels.hpp.inc index 5e1dea39544..5b3e58635ff 100644 --- a/common/cuda_hip/factorization/par_ict_spgeam_kernels.hpp.inc +++ b/common/cuda_hip/factorization/par_ict_spgeam_kernels.hpp.inc @@ -35,11 +35,11 @@ namespace kernel { template __global__ __launch_bounds__(default_block_size) void ict_tri_spgeam_nnz( - const IndexType *__restrict__ llh_row_ptrs, - const IndexType *__restrict__ llh_col_idxs, - const IndexType *__restrict__ a_row_ptrs, - const IndexType *__restrict__ a_col_idxs, - IndexType *__restrict__ l_new_row_ptrs, IndexType num_rows) + const IndexType* __restrict__ llh_row_ptrs, + const IndexType* __restrict__ llh_col_idxs, + const IndexType* __restrict__ a_row_ptrs, + const IndexType* __restrict__ a_col_idxs, + IndexType* __restrict__ l_new_row_ptrs, IndexType num_rows) { auto subwarp = group::tiled_partition(group::this_thread_block()); @@ -72,17 +72,17 @@ __global__ __launch_bounds__(default_block_size) void ict_tri_spgeam_nnz( template __global__ __launch_bounds__(default_block_size) void ict_tri_spgeam_init( - const IndexType *__restrict__ llh_row_ptrs, - const IndexType *__restrict__ llh_col_idxs, - const ValueType *__restrict__ llh_vals, - const IndexType *__restrict__ a_row_ptrs, - const IndexType *__restrict__ a_col_idxs, - const ValueType *__restrict__ a_vals, - const IndexType *__restrict__ l_row_ptrs, - const IndexType *__restrict__ l_col_idxs, - const ValueType *__restrict__ l_vals, - const IndexType *__restrict__ l_new_row_ptrs, - IndexType *__restrict__ l_new_col_idxs, ValueType *__restrict__ l_new_vals, + const IndexType* __restrict__ llh_row_ptrs, + const IndexType* __restrict__ llh_col_idxs, + const ValueType* __restrict__ llh_vals, + const IndexType* __restrict__ a_row_ptrs, + const IndexType* __restrict__ a_col_idxs, + const ValueType* __restrict__ a_vals, + const IndexType* __restrict__ l_row_ptrs, + const IndexType* __restrict__ l_col_idxs, + const ValueType* __restrict__ l_vals, + const IndexType* __restrict__ l_new_row_ptrs, + IndexType* __restrict__ l_new_col_idxs, ValueType* __restrict__ l_new_vals, IndexType num_rows) { auto subwarp = diff --git a/common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc b/common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc index 418efef9c5c..c7d4b4c4f99 100644 --- a/common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc +++ b/common/cuda_hip/factorization/par_ict_sweep_kernels.hpp.inc @@ -35,12 +35,12 @@ namespace kernel { template __global__ __launch_bounds__(default_block_size) void ict_sweep( - const IndexType *__restrict__ a_row_ptrs, - const IndexType *__restrict__ a_col_idxs, - const ValueType *__restrict__ a_vals, - const IndexType *__restrict__ l_row_ptrs, - const IndexType *__restrict__ l_row_idxs, - const IndexType *__restrict__ l_col_idxs, ValueType *__restrict__ l_vals, + const IndexType* __restrict__ a_row_ptrs, + const IndexType* __restrict__ a_col_idxs, + const ValueType* __restrict__ a_vals, + const IndexType* __restrict__ l_row_ptrs, + const IndexType* __restrict__ l_row_idxs, + const IndexType* __restrict__ l_col_idxs, ValueType* __restrict__ l_vals, IndexType l_nnz) { auto l_nz = thread::get_subwarp_id_flat(); diff --git a/common/cuda_hip/factorization/par_ilu_kernels.hpp.inc b/common/cuda_hip/factorization/par_ilu_kernels.hpp.inc index f2fdc1468e1..fee1a14d85d 100644 --- a/common/cuda_hip/factorization/par_ilu_kernels.hpp.inc +++ b/common/cuda_hip/factorization/par_ilu_kernels.hpp.inc @@ -35,13 +35,13 @@ namespace kernel { template __global__ __launch_bounds__(default_block_size) void compute_l_u_factors( - size_type num_elements, const IndexType *__restrict__ row_idxs, - const IndexType *__restrict__ col_idxs, - const ValueType *__restrict__ values, - const IndexType *__restrict__ l_row_ptrs, - const IndexType *__restrict__ l_col_idxs, ValueType *__restrict__ l_values, - const IndexType *__restrict__ u_row_ptrs, - const IndexType *__restrict__ u_col_idxs, ValueType *__restrict__ u_values) + size_type num_elements, const IndexType* __restrict__ row_idxs, + const IndexType* __restrict__ col_idxs, + const ValueType* __restrict__ values, + const IndexType* __restrict__ l_row_ptrs, + const IndexType* __restrict__ l_col_idxs, ValueType* __restrict__ l_values, + const IndexType* __restrict__ u_row_ptrs, + const IndexType* __restrict__ u_col_idxs, ValueType* __restrict__ u_values) { const auto elem_id = thread::get_thread_id_flat(); if (elem_id < num_elements) { diff --git a/common/cuda_hip/factorization/par_ilut_filter_kernels.hpp.inc b/common/cuda_hip/factorization/par_ilut_filter_kernels.hpp.inc index b5f7d43db67..4ae707ed42c 100644 --- a/common/cuda_hip/factorization/par_ilut_filter_kernels.hpp.inc +++ b/common/cuda_hip/factorization/par_ilut_filter_kernels.hpp.inc @@ -36,7 +36,7 @@ namespace kernel { template -__device__ void abstract_filter_impl(const IndexType *row_ptrs, +__device__ void abstract_filter_impl(const IndexType* row_ptrs, IndexType num_rows, Predicate pred, BeginCallback begin_cb, StepCallback step_cb, @@ -66,9 +66,9 @@ __device__ void abstract_filter_impl(const IndexType *row_ptrs, template -__device__ void abstract_filter_nnz(const IndexType *__restrict__ row_ptrs, +__device__ void abstract_filter_nnz(const IndexType* __restrict__ row_ptrs, IndexType num_rows, Predicate pred, - IndexType *__restrict__ nnz) + IndexType* __restrict__ nnz) { IndexType count{}; abstract_filter_impl( @@ -86,14 +86,14 @@ __device__ void abstract_filter_nnz(const IndexType *__restrict__ row_ptrs, template -__device__ void abstract_filter(const IndexType *__restrict__ old_row_ptrs, - const IndexType *__restrict__ old_col_idxs, - const ValueType *__restrict__ old_vals, +__device__ void abstract_filter(const IndexType* __restrict__ old_row_ptrs, + const IndexType* __restrict__ old_col_idxs, + const ValueType* __restrict__ old_vals, IndexType num_rows, Predicate pred, - const IndexType *__restrict__ new_row_ptrs, - IndexType *__restrict__ new_row_idxs, - IndexType *__restrict__ new_col_idxs, - ValueType *__restrict__ new_vals) + const IndexType* __restrict__ new_row_ptrs, + IndexType* __restrict__ new_row_idxs, + IndexType* __restrict__ new_col_idxs, + ValueType* __restrict__ new_vals) { IndexType count{}; IndexType new_offset{}; @@ -121,9 +121,9 @@ __device__ void abstract_filter(const IndexType *__restrict__ old_row_ptrs, template __global__ __launch_bounds__(default_block_size) void threshold_filter_nnz( - const IndexType *__restrict__ row_ptrs, const ValueType *vals, + const IndexType* __restrict__ row_ptrs, const ValueType* vals, IndexType num_rows, remove_complex threshold, - IndexType *__restrict__ nnz, bool lower) + IndexType* __restrict__ nnz, bool lower) { abstract_filter_nnz( row_ptrs, num_rows, @@ -137,13 +137,13 @@ __global__ __launch_bounds__(default_block_size) void threshold_filter_nnz( template __global__ __launch_bounds__(default_block_size) void threshold_filter( - const IndexType *__restrict__ old_row_ptrs, - const IndexType *__restrict__ old_col_idxs, - const ValueType *__restrict__ old_vals, IndexType num_rows, + const IndexType* __restrict__ old_row_ptrs, + const IndexType* __restrict__ old_col_idxs, + const ValueType* __restrict__ old_vals, IndexType num_rows, remove_complex threshold, - const IndexType *__restrict__ new_row_ptrs, - IndexType *__restrict__ new_row_idxs, IndexType *__restrict__ new_col_idxs, - ValueType *__restrict__ new_vals, bool lower) + const IndexType* __restrict__ new_row_ptrs, + IndexType* __restrict__ new_row_idxs, IndexType* __restrict__ new_col_idxs, + ValueType* __restrict__ new_vals, bool lower) { abstract_filter( old_row_ptrs, old_col_idxs, old_vals, num_rows, @@ -157,8 +157,8 @@ __global__ __launch_bounds__(default_block_size) void threshold_filter( template __global__ __launch_bounds__(default_block_size) void bucket_filter_nnz( - const IndexType *__restrict__ row_ptrs, const BucketType *buckets, - IndexType num_rows, BucketType bucket, IndexType *__restrict__ nnz) + const IndexType* __restrict__ row_ptrs, const BucketType* buckets, + IndexType num_rows, BucketType bucket, IndexType* __restrict__ nnz) { abstract_filter_nnz( row_ptrs, num_rows, @@ -172,13 +172,13 @@ __global__ __launch_bounds__(default_block_size) void bucket_filter_nnz( template __global__ __launch_bounds__(default_block_size) void bucket_filter( - const IndexType *__restrict__ old_row_ptrs, - const IndexType *__restrict__ old_col_idxs, - const ValueType *__restrict__ old_vals, const BucketType *buckets, + const IndexType* __restrict__ old_row_ptrs, + const IndexType* __restrict__ old_col_idxs, + const ValueType* __restrict__ old_vals, const BucketType* buckets, IndexType num_rows, BucketType bucket, - const IndexType *__restrict__ new_row_ptrs, - IndexType *__restrict__ new_row_idxs, IndexType *__restrict__ new_col_idxs, - ValueType *__restrict__ new_vals) + const IndexType* __restrict__ new_row_ptrs, + IndexType* __restrict__ new_row_idxs, IndexType* __restrict__ new_col_idxs, + ValueType* __restrict__ new_vals) { abstract_filter( old_row_ptrs, old_col_idxs, old_vals, num_rows, diff --git a/common/cuda_hip/factorization/par_ilut_select_kernels.hpp.inc b/common/cuda_hip/factorization/par_ilut_select_kernels.hpp.inc index e443d7b6ba7..6c30718536a 100644 --- a/common/cuda_hip/factorization/par_ilut_select_kernels.hpp.inc +++ b/common/cuda_hip/factorization/par_ilut_select_kernels.hpp.inc @@ -53,8 +53,8 @@ constexpr auto basecase_block_size = basecase_size / basecase_local_size; */ template __global__ __launch_bounds__(searchtree_width) void build_searchtree( - const ValueType *__restrict__ input, IndexType size, - remove_complex *__restrict__ tree_output) + const ValueType* __restrict__ input, IndexType size, + remove_complex* __restrict__ tree_output) { using AbsType = remove_complex; auto idx = threadIdx.x; @@ -92,9 +92,9 @@ __global__ __launch_bounds__(searchtree_width) void build_searchtree( */ template __global__ __launch_bounds__(default_block_size) void count_buckets( - const ValueType *__restrict__ input, IndexType size, - const remove_complex *__restrict__ tree, IndexType *counter, - unsigned char *oracles, int items_per_thread) + const ValueType* __restrict__ input, IndexType size, + const remove_complex* __restrict__ tree, IndexType* counter, + unsigned char* oracles, int items_per_thread) { // load tree into shared memory, initialize counters __shared__ remove_complex sh_tree[searchtree_inner_size]; @@ -148,7 +148,7 @@ __global__ __launch_bounds__(default_block_size) void count_buckets( */ template __global__ __launch_bounds__(default_block_size) void block_prefix_sum( - IndexType *__restrict__ counters, IndexType *__restrict__ totals, + IndexType* __restrict__ counters, IndexType* __restrict__ totals, IndexType num_blocks) { constexpr auto num_warps = default_block_size / config::warp_size; @@ -225,9 +225,9 @@ __global__ __launch_bounds__(default_block_size) void block_prefix_sum( */ template __global__ __launch_bounds__(default_block_size) void filter_bucket( - const ValueType *__restrict__ input, IndexType size, unsigned char bucket, - const unsigned char *oracles, const IndexType *block_offsets, - remove_complex *__restrict__ output, int items_per_thread) + const ValueType* __restrict__ input, IndexType size, unsigned char bucket, + const unsigned char* oracles, const IndexType* block_offsets, + remove_complex* __restrict__ output, int items_per_thread) { // initialize the counter with the block prefix sum. __shared__ IndexType counter; @@ -261,8 +261,8 @@ __global__ __launch_bounds__(default_block_size) void filter_bucket( */ template __global__ __launch_bounds__(basecase_block_size) void basecase_select( - const ValueType *__restrict__ input, IndexType size, IndexType rank, - ValueType *__restrict__ out) + const ValueType* __restrict__ input, IndexType size, IndexType rank, + ValueType* __restrict__ out) { constexpr auto sentinel = device_numeric_limits::inf; ValueType local[basecase_local_size]; @@ -287,7 +287,7 @@ __global__ __launch_bounds__(basecase_block_size) void basecase_select( */ template __global__ __launch_bounds__(config::warp_size) void find_bucket( - IndexType *prefix_sum, IndexType rank) + IndexType* prefix_sum, IndexType rank) { auto warp = group::tiled_partition(group::this_thread_block()); diff --git a/common/cuda_hip/factorization/par_ilut_spgeam_kernels.hpp.inc b/common/cuda_hip/factorization/par_ilut_spgeam_kernels.hpp.inc index b8ba3b0171f..068e84ddb4d 100644 --- a/common/cuda_hip/factorization/par_ilut_spgeam_kernels.hpp.inc +++ b/common/cuda_hip/factorization/par_ilut_spgeam_kernels.hpp.inc @@ -35,12 +35,12 @@ namespace kernel { template __global__ __launch_bounds__(default_block_size) void tri_spgeam_nnz( - const IndexType *__restrict__ lu_row_ptrs, - const IndexType *__restrict__ lu_col_idxs, - const IndexType *__restrict__ a_row_ptrs, - const IndexType *__restrict__ a_col_idxs, - IndexType *__restrict__ l_new_row_ptrs, - IndexType *__restrict__ u_new_row_ptrs, IndexType num_rows) + const IndexType* __restrict__ lu_row_ptrs, + const IndexType* __restrict__ lu_col_idxs, + const IndexType* __restrict__ a_row_ptrs, + const IndexType* __restrict__ a_col_idxs, + IndexType* __restrict__ l_new_row_ptrs, + IndexType* __restrict__ u_new_row_ptrs, IndexType num_rows) { auto subwarp = group::tiled_partition(group::this_thread_block()); @@ -76,22 +76,22 @@ __global__ __launch_bounds__(default_block_size) void tri_spgeam_nnz( template __global__ __launch_bounds__(default_block_size) void tri_spgeam_init( - const IndexType *__restrict__ lu_row_ptrs, - const IndexType *__restrict__ lu_col_idxs, - const ValueType *__restrict__ lu_vals, - const IndexType *__restrict__ a_row_ptrs, - const IndexType *__restrict__ a_col_idxs, - const ValueType *__restrict__ a_vals, - const IndexType *__restrict__ l_row_ptrs, - const IndexType *__restrict__ l_col_idxs, - const ValueType *__restrict__ l_vals, - const IndexType *__restrict__ u_row_ptrs, - const IndexType *__restrict__ u_col_idxs, - const ValueType *__restrict__ u_vals, - const IndexType *__restrict__ l_new_row_ptrs, - IndexType *__restrict__ l_new_col_idxs, ValueType *__restrict__ l_new_vals, - const IndexType *__restrict__ u_new_row_ptrs, - IndexType *__restrict__ u_new_col_idxs, ValueType *__restrict__ u_new_vals, + const IndexType* __restrict__ lu_row_ptrs, + const IndexType* __restrict__ lu_col_idxs, + const ValueType* __restrict__ lu_vals, + const IndexType* __restrict__ a_row_ptrs, + const IndexType* __restrict__ a_col_idxs, + const ValueType* __restrict__ a_vals, + const IndexType* __restrict__ l_row_ptrs, + const IndexType* __restrict__ l_col_idxs, + const ValueType* __restrict__ l_vals, + const IndexType* __restrict__ u_row_ptrs, + const IndexType* __restrict__ u_col_idxs, + const ValueType* __restrict__ u_vals, + const IndexType* __restrict__ l_new_row_ptrs, + IndexType* __restrict__ l_new_col_idxs, ValueType* __restrict__ l_new_vals, + const IndexType* __restrict__ u_new_row_ptrs, + IndexType* __restrict__ u_new_col_idxs, ValueType* __restrict__ u_new_vals, IndexType num_rows) { auto subwarp = diff --git a/common/cuda_hip/factorization/par_ilut_sweep_kernels.hpp.inc b/common/cuda_hip/factorization/par_ilut_sweep_kernels.hpp.inc index 5b78d07f28e..341b0f44ae3 100644 --- a/common/cuda_hip/factorization/par_ilut_sweep_kernels.hpp.inc +++ b/common/cuda_hip/factorization/par_ilut_sweep_kernels.hpp.inc @@ -35,16 +35,16 @@ namespace kernel { template __global__ __launch_bounds__(default_block_size) void sweep( - const IndexType *__restrict__ a_row_ptrs, - const IndexType *__restrict__ a_col_idxs, - const ValueType *__restrict__ a_vals, - const IndexType *__restrict__ l_row_ptrs, - const IndexType *__restrict__ l_row_idxs, - const IndexType *__restrict__ l_col_idxs, ValueType *__restrict__ l_vals, - IndexType l_nnz, const IndexType *__restrict__ u_row_idxs, - const IndexType *__restrict__ u_col_idxs, ValueType *__restrict__ u_vals, - const IndexType *__restrict__ ut_col_ptrs, - const IndexType *__restrict__ ut_row_idxs, ValueType *__restrict__ ut_vals, + const IndexType* __restrict__ a_row_ptrs, + const IndexType* __restrict__ a_col_idxs, + const ValueType* __restrict__ a_vals, + const IndexType* __restrict__ l_row_ptrs, + const IndexType* __restrict__ l_row_idxs, + const IndexType* __restrict__ l_col_idxs, ValueType* __restrict__ l_vals, + IndexType l_nnz, const IndexType* __restrict__ u_row_idxs, + const IndexType* __restrict__ u_col_idxs, ValueType* __restrict__ u_vals, + const IndexType* __restrict__ ut_col_ptrs, + const IndexType* __restrict__ ut_row_idxs, ValueType* __restrict__ ut_vals, IndexType u_nnz) { auto tidx = thread::get_subwarp_id_flat(); diff --git a/common/cuda_hip/matrix/coo_kernels.hpp.inc b/common/cuda_hip/matrix/coo_kernels.hpp.inc index 22d8191ab8a..a4cb4b87af1 100644 --- a/common/cuda_hip/matrix/coo_kernels.hpp.inc +++ b/common/cuda_hip/matrix/coo_kernels.hpp.inc @@ -55,11 +55,11 @@ namespace { template __device__ void spmv_kernel(const size_type nnz, const size_type num_lines, - const ValueType *__restrict__ val, - const IndexType *__restrict__ col, - const IndexType *__restrict__ row, - const ValueType *__restrict__ b, - const size_type b_stride, ValueType *__restrict__ c, + const ValueType* __restrict__ val, + const IndexType* __restrict__ col, + const IndexType* __restrict__ row, + const ValueType* __restrict__ b, + const size_type b_stride, ValueType* __restrict__ c, const size_type c_stride, Closure scale) { ValueType temp_val = zero(); @@ -109,28 +109,28 @@ __device__ void spmv_kernel(const size_type nnz, const size_type num_lines, template __global__ __launch_bounds__(spmv_block_size) void abstract_spmv( const size_type nnz, const size_type num_lines, - const ValueType *__restrict__ val, const IndexType *__restrict__ col, - const IndexType *__restrict__ row, const ValueType *__restrict__ b, - const size_type b_stride, ValueType *__restrict__ c, + const ValueType* __restrict__ val, const IndexType* __restrict__ col, + const IndexType* __restrict__ row, const ValueType* __restrict__ b, + const size_type b_stride, ValueType* __restrict__ c, const size_type c_stride) { spmv_kernel(nnz, num_lines, val, col, row, b, b_stride, c, c_stride, - [](const ValueType &x) { return x; }); + [](const ValueType& x) { return x; }); } template __global__ __launch_bounds__(spmv_block_size) void abstract_spmv( const size_type nnz, const size_type num_lines, - const ValueType *__restrict__ alpha, const ValueType *__restrict__ val, - const IndexType *__restrict__ col, const IndexType *__restrict__ row, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride) + const ValueType* __restrict__ alpha, const ValueType* __restrict__ val, + const IndexType* __restrict__ col, const IndexType* __restrict__ row, + const ValueType* __restrict__ b, const size_type b_stride, + ValueType* __restrict__ c, const size_type c_stride) { ValueType scale_factor = alpha[0]; spmv_kernel( nnz, num_lines, val, col, row, b, b_stride, c, c_stride, - [&scale_factor](const ValueType &x) { return scale_factor * x; }); + [&scale_factor](const ValueType& x) { return scale_factor * x; }); } @@ -155,12 +155,12 @@ __global__ __launch_bounds__(spmv_block_size) void abstract_spmv( */ template __device__ void spmm_kernel(const size_type nnz, const size_type num_elems, - const ValueType *__restrict__ val, - const IndexType *__restrict__ col, - const IndexType *__restrict__ row, + const ValueType* __restrict__ val, + const IndexType* __restrict__ col, + const IndexType* __restrict__ row, const size_type num_cols, - const ValueType *__restrict__ b, - const size_type b_stride, ValueType *__restrict__ c, + const ValueType* __restrict__ b, + const size_type b_stride, ValueType* __restrict__ c, const size_type c_stride, Closure scale) { ValueType temp = zero(); @@ -191,29 +191,29 @@ __device__ void spmm_kernel(const size_type nnz, const size_type num_elems, template __global__ __launch_bounds__(spmv_block_size) void abstract_spmm( const size_type nnz, const size_type num_elems, - const ValueType *__restrict__ val, const IndexType *__restrict__ col, - const IndexType *__restrict__ row, const size_type num_cols, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride) + const ValueType* __restrict__ val, const IndexType* __restrict__ col, + const IndexType* __restrict__ row, const size_type num_cols, + const ValueType* __restrict__ b, const size_type b_stride, + ValueType* __restrict__ c, const size_type c_stride) { spmm_kernel(nnz, num_elems, val, col, row, num_cols, b, b_stride, c, - c_stride, [](const ValueType &x) { return x; }); + c_stride, [](const ValueType& x) { return x; }); } template __global__ __launch_bounds__(spmv_block_size) void abstract_spmm( const size_type nnz, const size_type num_elems, - const ValueType *__restrict__ alpha, const ValueType *__restrict__ val, - const IndexType *__restrict__ col, const IndexType *__restrict__ row, - const size_type num_cols, const ValueType *__restrict__ b, - const size_type b_stride, ValueType *__restrict__ c, + const ValueType* __restrict__ alpha, const ValueType* __restrict__ val, + const IndexType* __restrict__ col, const IndexType* __restrict__ row, + const size_type num_cols, const ValueType* __restrict__ b, + const size_type b_stride, ValueType* __restrict__ c, const size_type c_stride) { ValueType scale_factor = alpha[0]; spmm_kernel( nnz, num_elems, val, col, row, num_cols, b, b_stride, c, c_stride, - [&scale_factor](const ValueType &x) { return scale_factor * x; }); + [&scale_factor](const ValueType& x) { return scale_factor * x; }); } @@ -225,8 +225,8 @@ namespace kernel { template __global__ __launch_bounds__(default_block_size) void convert_row_idxs_to_ptrs( - const IndexType *__restrict__ idxs, size_type num_nonzeros, - IndexType *__restrict__ ptrs, size_type length) + const IndexType* __restrict__ idxs, size_type num_nonzeros, + IndexType* __restrict__ ptrs, size_type length) { const auto tidx = thread::get_thread_id_flat(); @@ -248,7 +248,7 @@ __global__ __launch_bounds__(default_block_size) void convert_row_idxs_to_ptrs( template __global__ __launch_bounds__(config::max_block_size) void initialize_zero_dense( size_type num_rows, size_type num_cols, size_type stride, - ValueType *__restrict__ result) + ValueType* __restrict__ result) { const auto tidx_x = threadIdx.x + blockDim.x * blockIdx.x; const auto tidx_y = threadIdx.y + blockDim.y * blockIdx.y; @@ -260,10 +260,10 @@ __global__ __launch_bounds__(config::max_block_size) void initialize_zero_dense( template __global__ __launch_bounds__(default_block_size) void fill_in_dense( - size_type nnz, const IndexType *__restrict__ row_idxs, - const IndexType *__restrict__ col_idxs, - const ValueType *__restrict__ values, size_type stride, - ValueType *__restrict__ result) + size_type nnz, const IndexType* __restrict__ row_idxs, + const IndexType* __restrict__ col_idxs, + const ValueType* __restrict__ values, size_type stride, + ValueType* __restrict__ result) { const auto tidx = thread::get_thread_id_flat(); if (tidx < nnz) { diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc index 49c3a138e90..7c8cb7f7a5f 100644 --- a/common/cuda_hip/matrix/csr_kernels.hpp.inc +++ b/common/cuda_hip/matrix/csr_kernels.hpp.inc @@ -42,7 +42,7 @@ __host__ __device__ __forceinline__ T ceildivT(T nom, T denom) template __device__ __forceinline__ bool block_segment_scan_reverse( - const IndexType *__restrict__ ind, ValueType *__restrict__ val) + const IndexType* __restrict__ ind, ValueType* __restrict__ val) { bool last = true; const auto reg_ind = ind[threadIdx.x]; @@ -68,9 +68,9 @@ __device__ __forceinline__ bool block_segment_scan_reverse( template __device__ __forceinline__ void find_next_row( const IndexType num_rows, const IndexType data_size, const IndexType ind, - IndexType *__restrict__ row, IndexType *__restrict__ row_end, + IndexType* __restrict__ row, IndexType* __restrict__ row_end, const IndexType row_predict, const IndexType row_predict_end, - const IndexType *__restrict__ row_ptr) + const IndexType* __restrict__ row_ptr) { if (!overflow || ind < data_size) { if (ind >= *row_end) { @@ -91,8 +91,8 @@ __device__ __forceinline__ void find_next_row( template __device__ __forceinline__ void warp_atomic_add( - const group::thread_block_tile &group, bool force_write, - ValueType *__restrict__ val, const IndexType row, ValueType *__restrict__ c, + const group::thread_block_tile& group, bool force_write, + ValueType* __restrict__ val, const IndexType row, ValueType* __restrict__ c, const size_type c_stride, const IndexType column_id, Closure scale) { // do a local scan to avoid atomic collisions @@ -109,14 +109,14 @@ __device__ __forceinline__ void warp_atomic_add( template __device__ __forceinline__ void process_window( - const group::thread_block_tile &group, + const group::thread_block_tile& group, const IndexType num_rows, const IndexType data_size, const IndexType ind, - IndexType *__restrict__ row, IndexType *__restrict__ row_end, - IndexType *__restrict__ nrow, IndexType *__restrict__ nrow_end, - ValueType *__restrict__ temp_val, const ValueType *__restrict__ val, - const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, const ValueType *__restrict__ b, - const size_type b_stride, ValueType *__restrict__ c, + IndexType* __restrict__ row, IndexType* __restrict__ row_end, + IndexType* __restrict__ nrow, IndexType* __restrict__ nrow_end, + ValueType* __restrict__ temp_val, const ValueType* __restrict__ val, + const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, const ValueType* __restrict__ b, + const size_type b_stride, ValueType* __restrict__ c, const size_type c_stride, const IndexType column_id, Closure scale) { const IndexType curr_row = *row; @@ -149,10 +149,10 @@ __device__ __forceinline__ IndexType get_warp_start_idx( template __device__ __forceinline__ void spmv_kernel( const IndexType nwarps, const IndexType num_rows, - const ValueType *__restrict__ val, const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, const IndexType *__restrict__ srow, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride, Closure scale) + const ValueType* __restrict__ val, const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, const IndexType* __restrict__ srow, + const ValueType* __restrict__ b, const size_type b_stride, + ValueType* __restrict__ c, const size_type c_stride, Closure scale) { const IndexType warp_idx = blockIdx.x * warps_in_block + threadIdx.y; const IndexType column_id = blockIdx.y; @@ -193,28 +193,28 @@ __device__ __forceinline__ void spmv_kernel( template __global__ __launch_bounds__(spmv_block_size) void abstract_spmv( const IndexType nwarps, const IndexType num_rows, - const ValueType *__restrict__ val, const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, const IndexType *__restrict__ srow, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride) + const ValueType* __restrict__ val, const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, const IndexType* __restrict__ srow, + const ValueType* __restrict__ b, const size_type b_stride, + ValueType* __restrict__ c, const size_type c_stride) { spmv_kernel(nwarps, num_rows, val, col_idxs, row_ptrs, srow, b, b_stride, c, - c_stride, [](const ValueType &x) { return x; }); + c_stride, [](const ValueType& x) { return x; }); } template __global__ __launch_bounds__(spmv_block_size) void abstract_spmv( const IndexType nwarps, const IndexType num_rows, - const ValueType *__restrict__ alpha, const ValueType *__restrict__ val, - const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, const IndexType *__restrict__ srow, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride) + const ValueType* __restrict__ alpha, const ValueType* __restrict__ val, + const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, const IndexType* __restrict__ srow, + const ValueType* __restrict__ b, const size_type b_stride, + ValueType* __restrict__ c, const size_type c_stride) { ValueType scale_factor = alpha[0]; spmv_kernel(nwarps, num_rows, val, col_idxs, row_ptrs, srow, b, b_stride, c, - c_stride, [&scale_factor](const ValueType &x) { + c_stride, [&scale_factor](const ValueType& x) { return scale_factor * x; }); } @@ -223,8 +223,8 @@ __global__ __launch_bounds__(spmv_block_size) void abstract_spmv( template __forceinline__ __device__ void merge_path_search( const IndexType diagonal, const IndexType a_len, const IndexType b_len, - const IndexType *__restrict__ a, const IndexType offset_b, - IndexType *__restrict__ x, IndexType *__restrict__ y) + const IndexType* __restrict__ a, const IndexType offset_b, + IndexType* __restrict__ x, IndexType* __restrict__ y) { auto x_min = max(diagonal - b_len, zero()); auto x_max = min(diagonal, a_len); @@ -244,9 +244,9 @@ __forceinline__ __device__ void merge_path_search( template __device__ void merge_path_reduce(const IndexType nwarps, - const ValueType *__restrict__ last_val, - const IndexType *__restrict__ last_row, - ValueType *__restrict__ c, + const ValueType* __restrict__ last_val, + const IndexType* __restrict__ last_row, + ValueType* __restrict__ c, const size_type c_stride, Alpha_op alpha_op) { const IndexType cache_lines = ceildivT(nwarps, spmv_block_size); @@ -273,8 +273,8 @@ __device__ void merge_path_reduce(const IndexType nwarps, tmp_val[threadIdx.x] = value; tmp_ind[threadIdx.x] = row; group::this_thread_block().sync(); - bool last = block_segment_scan_reverse(static_cast(tmp_ind), - static_cast(tmp_val)); + bool last = block_segment_scan_reverse(static_cast(tmp_ind), + static_cast(tmp_val)); group::this_thread_block().sync(); if (last) { c[row * c_stride] += alpha_op(tmp_val[threadIdx.x]); @@ -285,15 +285,15 @@ __device__ void merge_path_reduce(const IndexType nwarps, template __device__ void merge_path_spmv( - const IndexType num_rows, const ValueType *__restrict__ val, - const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, const IndexType *__restrict__ srow, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride, - IndexType *__restrict__ row_out, ValueType *__restrict__ val_out, + const IndexType num_rows, const ValueType* __restrict__ val, + const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, const IndexType* __restrict__ srow, + const ValueType* __restrict__ b, const size_type b_stride, + ValueType* __restrict__ c, const size_type c_stride, + IndexType* __restrict__ row_out, ValueType* __restrict__ val_out, Alpha_op alpha_op, Beta_op beta_op) { - const auto *row_end_ptrs = row_ptrs + 1; + const auto* row_end_ptrs = row_ptrs + 1; const auto nnz = row_ptrs[num_rows]; const IndexType num_merge_items = num_rows + nnz; const auto block_items = spmv_block_size * items_per_thread; @@ -344,9 +344,9 @@ __device__ void merge_path_spmv( } } group::this_thread_block().sync(); - IndexType *tmp_ind = shared_row_ptrs; - ValueType *tmp_val = - reinterpret_cast(shared_row_ptrs + spmv_block_size); + IndexType* tmp_ind = shared_row_ptrs; + ValueType* tmp_val = + reinterpret_cast(shared_row_ptrs + spmv_block_size); tmp_val[threadIdx.x] = value; tmp_ind[threadIdx.x] = row_i; group::this_thread_block().sync(); @@ -361,71 +361,71 @@ __device__ void merge_path_spmv( template __global__ __launch_bounds__(spmv_block_size) void abstract_merge_path_spmv( - const IndexType num_rows, const ValueType *__restrict__ val, - const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, const IndexType *__restrict__ srow, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride, - IndexType *__restrict__ row_out, ValueType *__restrict__ val_out) + const IndexType num_rows, const ValueType* __restrict__ val, + const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, const IndexType* __restrict__ srow, + const ValueType* __restrict__ b, const size_type b_stride, + ValueType* __restrict__ c, const size_type c_stride, + IndexType* __restrict__ row_out, ValueType* __restrict__ val_out) { merge_path_spmv( num_rows, val, col_idxs, row_ptrs, srow, b, b_stride, c, c_stride, - row_out, val_out, [](ValueType &x) { return x; }, - [](ValueType &x) { return zero(); }); + row_out, val_out, [](ValueType& x) { return x; }, + [](ValueType& x) { return zero(); }); } template __global__ __launch_bounds__(spmv_block_size) void abstract_merge_path_spmv( - const IndexType num_rows, const ValueType *__restrict__ alpha, - const ValueType *__restrict__ val, const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, const IndexType *__restrict__ srow, - const ValueType *__restrict__ b, const size_type b_stride, - const ValueType *__restrict__ beta, ValueType *__restrict__ c, - const size_type c_stride, IndexType *__restrict__ row_out, - ValueType *__restrict__ val_out) + const IndexType num_rows, const ValueType* __restrict__ alpha, + const ValueType* __restrict__ val, const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, const IndexType* __restrict__ srow, + const ValueType* __restrict__ b, const size_type b_stride, + const ValueType* __restrict__ beta, ValueType* __restrict__ c, + const size_type c_stride, IndexType* __restrict__ row_out, + ValueType* __restrict__ val_out) { const auto alpha_val = alpha[0]; const auto beta_val = beta[0]; merge_path_spmv( num_rows, val, col_idxs, row_ptrs, srow, b, b_stride, c, c_stride, - row_out, val_out, [&alpha_val](ValueType &x) { return alpha_val * x; }, - [&beta_val](ValueType &x) { return beta_val * x; }); + row_out, val_out, [&alpha_val](ValueType& x) { return alpha_val * x; }, + [&beta_val](ValueType& x) { return beta_val * x; }); } template __global__ __launch_bounds__(spmv_block_size) void abstract_reduce( - const IndexType nwarps, const ValueType *__restrict__ last_val, - const IndexType *__restrict__ last_row, ValueType *__restrict__ c, + const IndexType nwarps, const ValueType* __restrict__ last_val, + const IndexType* __restrict__ last_row, ValueType* __restrict__ c, const size_type c_stride) { merge_path_reduce(nwarps, last_val, last_row, c, c_stride, - [](ValueType &x) { return x; }); + [](ValueType& x) { return x; }); } template __global__ __launch_bounds__(spmv_block_size) void abstract_reduce( - const IndexType nwarps, const ValueType *__restrict__ last_val, - const IndexType *__restrict__ last_row, const ValueType *__restrict__ alpha, - ValueType *__restrict__ c, const size_type c_stride) + const IndexType nwarps, const ValueType* __restrict__ last_val, + const IndexType* __restrict__ last_row, const ValueType* __restrict__ alpha, + ValueType* __restrict__ c, const size_type c_stride) { const auto alpha_val = alpha[0]; merge_path_reduce(nwarps, last_val, last_row, c, c_stride, - [&alpha_val](ValueType &x) { return alpha_val * x; }); + [&alpha_val](ValueType& x) { return alpha_val * x; }); } template __device__ void device_classical_spmv(const size_type num_rows, - const ValueType *__restrict__ val, - const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, - const ValueType *__restrict__ b, + const ValueType* __restrict__ val, + const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, + const ValueType* __restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, + ValueType* __restrict__ c, const size_type c_stride, Closure scale) { auto subwarp_tile = @@ -443,7 +443,7 @@ __device__ void device_classical_spmv(const size_type num_rows, } auto subwarp_result = reduce( subwarp_tile, temp_val, - [](const ValueType &a, const ValueType &b) { return a + b; }); + [](const ValueType& a, const ValueType& b) { return a + b; }); if (subid == 0) { c[row * c_stride + column_id] = scale(subwarp_result, c[row * c_stride + column_id]); @@ -454,31 +454,31 @@ __device__ void device_classical_spmv(const size_type num_rows, template __global__ __launch_bounds__(spmv_block_size) void abstract_classical_spmv( - const size_type num_rows, const ValueType *__restrict__ val, - const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, const ValueType *__restrict__ b, - const size_type b_stride, ValueType *__restrict__ c, + const size_type num_rows, const ValueType* __restrict__ val, + const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, const ValueType* __restrict__ b, + const size_type b_stride, ValueType* __restrict__ c, const size_type c_stride) { device_classical_spmv( num_rows, val, col_idxs, row_ptrs, b, b_stride, c, c_stride, - [](const ValueType &x, const ValueType &y) { return x; }); + [](const ValueType& x, const ValueType& y) { return x; }); } template __global__ __launch_bounds__(spmv_block_size) void abstract_classical_spmv( - const size_type num_rows, const ValueType *__restrict__ alpha, - const ValueType *__restrict__ val, const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, const ValueType *__restrict__ b, - const size_type b_stride, const ValueType *__restrict__ beta, - ValueType *__restrict__ c, const size_type c_stride) + const size_type num_rows, const ValueType* __restrict__ alpha, + const ValueType* __restrict__ val, const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, const ValueType* __restrict__ b, + const size_type b_stride, const ValueType* __restrict__ beta, + ValueType* __restrict__ c, const size_type c_stride) { const auto alpha_val = alpha[0]; const auto beta_val = beta[0]; device_classical_spmv( num_rows, val, col_idxs, row_ptrs, b, b_stride, c, c_stride, - [&alpha_val, &beta_val](const ValueType &x, const ValueType &y) { + [&alpha_val, &beta_val](const ValueType& x, const ValueType& y) { return alpha_val * x + beta_val * y; }); } @@ -486,11 +486,11 @@ __global__ __launch_bounds__(spmv_block_size) void abstract_classical_spmv( template __global__ __launch_bounds__(default_block_size) void spgeam_nnz( - const IndexType *__restrict__ a_row_ptrs, - const IndexType *__restrict__ a_col_idxs, - const IndexType *__restrict__ b_row_ptrs, - const IndexType *__restrict__ b_col_idxs, IndexType num_rows, - IndexType *__restrict__ nnz) + const IndexType* __restrict__ a_row_ptrs, + const IndexType* __restrict__ a_col_idxs, + const IndexType* __restrict__ b_row_ptrs, + const IndexType* __restrict__ b_col_idxs, IndexType num_rows, + IndexType* __restrict__ nnz) { const auto row = thread::get_subwarp_id_flat(); auto subwarp = @@ -520,15 +520,15 @@ __global__ __launch_bounds__(default_block_size) void spgeam_nnz( template __global__ __launch_bounds__(default_block_size) void spgeam( - const ValueType *__restrict__ palpha, - const IndexType *__restrict__ a_row_ptrs, - const IndexType *__restrict__ a_col_idxs, - const ValueType *__restrict__ a_vals, const ValueType *__restrict__ pbeta, - const IndexType *__restrict__ b_row_ptrs, - const IndexType *__restrict__ b_col_idxs, - const ValueType *__restrict__ b_vals, IndexType num_rows, - const IndexType *__restrict__ c_row_ptrs, - IndexType *__restrict__ c_col_idxs, ValueType *__restrict__ c_vals) + const ValueType* __restrict__ palpha, + const IndexType* __restrict__ a_row_ptrs, + const IndexType* __restrict__ a_col_idxs, + const ValueType* __restrict__ a_vals, const ValueType* __restrict__ pbeta, + const IndexType* __restrict__ b_row_ptrs, + const IndexType* __restrict__ b_col_idxs, + const ValueType* __restrict__ b_vals, IndexType num_rows, + const IndexType* __restrict__ c_row_ptrs, + IndexType* __restrict__ c_col_idxs, ValueType* __restrict__ c_vals) { const auto row = thread::get_subwarp_id_flat(); auto subwarp = @@ -587,8 +587,8 @@ __global__ __launch_bounds__(default_block_size) void spgeam( template __global__ __launch_bounds__(default_block_size) void convert_row_ptrs_to_idxs( - size_type num_rows, const IndexType *__restrict__ ptrs, - IndexType *__restrict__ idxs) + size_type num_rows, const IndexType* __restrict__ ptrs, + IndexType* __restrict__ idxs) { const auto tidx = thread::get_thread_id_flat(); if (tidx < num_rows) { @@ -602,7 +602,7 @@ __global__ __launch_bounds__(default_block_size) void convert_row_ptrs_to_idxs( template __global__ __launch_bounds__(config::max_block_size) void initialize_zero_dense( size_type num_rows, size_type num_cols, size_type stride, - ValueType *__restrict__ result) + ValueType* __restrict__ result) { const auto tidx_x = threadIdx.x + blockDim.x * blockIdx.x; const auto tidx_y = threadIdx.y + blockDim.y * blockIdx.y; @@ -614,10 +614,10 @@ __global__ __launch_bounds__(config::max_block_size) void initialize_zero_dense( template __global__ __launch_bounds__(default_block_size) void fill_in_dense( - size_type num_rows, const IndexType *__restrict__ row_ptrs, - const IndexType *__restrict__ col_idxs, - const ValueType *__restrict__ values, size_type stride, - ValueType *__restrict__ result) + size_type num_rows, const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, + const ValueType* __restrict__ values, size_type stride, + ValueType* __restrict__ result) { const auto tidx = thread::get_thread_id_flat(); if (tidx < num_rows) { @@ -630,8 +630,8 @@ __global__ __launch_bounds__(default_block_size) void fill_in_dense( template __global__ __launch_bounds__(default_block_size) void calculate_nnz_per_row( - size_type num_rows, const IndexType *__restrict__ row_ptrs, - size_type *__restrict__ nnz_per_row) + size_type num_rows, const IndexType* __restrict__ row_ptrs, + size_type* __restrict__ nnz_per_row) { const auto tidx = thread::get_thread_id_flat(); if (tidx < num_rows) { @@ -642,8 +642,8 @@ __global__ __launch_bounds__(default_block_size) void calculate_nnz_per_row( __global__ __launch_bounds__(config::warp_size) void calculate_slice_lengths( size_type num_rows, size_type slice_size, size_type stride_factor, - const size_type *__restrict__ nnz_per_row, - size_type *__restrict__ slice_lengths, size_type *__restrict__ slice_sets) + const size_type* __restrict__ nnz_per_row, + size_type* __restrict__ slice_lengths, size_type* __restrict__ slice_sets) { constexpr auto warp_size = config::warp_size; const auto sliceid = blockIdx.x; @@ -662,7 +662,7 @@ __global__ __launch_bounds__(config::warp_size) void calculate_slice_lengths( group::tiled_partition(group::this_thread_block()); auto warp_result = reduce( warp_tile, thread_result, - [](const size_type &a, const size_type &b) { return max(a, b); }); + [](const size_type& a, const size_type& b) { return max(a, b); }); if (tid_in_warp == 0) { auto slice_length = @@ -677,12 +677,12 @@ __global__ __launch_bounds__(config::warp_size) void calculate_slice_lengths( template __global__ __launch_bounds__(default_block_size) void fill_in_sellp( size_type num_rows, size_type slice_size, - const ValueType *__restrict__ source_values, - const IndexType *__restrict__ source_row_ptrs, - const IndexType *__restrict__ source_col_idxs, - size_type *__restrict__ slice_lengths, size_type *__restrict__ slice_sets, - IndexType *__restrict__ result_col_idxs, - ValueType *__restrict__ result_values) + const ValueType* __restrict__ source_values, + const IndexType* __restrict__ source_row_ptrs, + const IndexType* __restrict__ source_col_idxs, + size_type* __restrict__ slice_lengths, size_type* __restrict__ slice_sets, + IndexType* __restrict__ result_col_idxs, + ValueType* __restrict__ result_values) { const auto global_row = thread::get_thread_id_flat(); const auto row = global_row % slice_size; @@ -710,8 +710,8 @@ __global__ __launch_bounds__(default_block_size) void fill_in_sellp( template __global__ __launch_bounds__(default_block_size) void initialize_zero_ell( - size_type max_nnz_per_row, size_type stride, ValueType *__restrict__ values, - IndexType *__restrict__ col_idxs) + size_type max_nnz_per_row, size_type stride, ValueType* __restrict__ values, + IndexType* __restrict__ col_idxs) { const auto tidx = thread::get_thread_id_flat(); @@ -725,11 +725,11 @@ __global__ __launch_bounds__(default_block_size) void initialize_zero_ell( template __global__ __launch_bounds__(default_block_size) void fill_in_ell( size_type num_rows, size_type stride, - const ValueType *__restrict__ source_values, - const IndexType *__restrict__ source_row_ptrs, - const IndexType *__restrict__ source_col_idxs, - ValueType *__restrict__ result_values, - IndexType *__restrict__ result_col_idxs) + const ValueType* __restrict__ source_values, + const IndexType* __restrict__ source_row_ptrs, + const IndexType* __restrict__ source_col_idxs, + ValueType* __restrict__ result_values, + IndexType* __restrict__ result_col_idxs) { constexpr auto warp_size = config::warp_size; const auto row = thread::get_subwarp_id_flat(); @@ -750,7 +750,7 @@ __global__ __launch_bounds__(default_block_size) void fill_in_ell( __global__ __launch_bounds__(default_block_size) void reduce_max_nnz_per_slice( size_type num_rows, size_type slice_size, size_type stride_factor, - const size_type *__restrict__ nnz_per_row, size_type *__restrict__ result) + const size_type* __restrict__ nnz_per_row, size_type* __restrict__ result) { constexpr auto warp_size = config::warp_size; auto warp_tile = @@ -768,7 +768,7 @@ __global__ __launch_bounds__(default_block_size) void reduce_max_nnz_per_slice( } auto warp_result = reduce( warp_tile, thread_result, - [](const size_type &a, const size_type &b) { return max(a, b); }); + [](const size_type& a, const size_type& b) { return max(a, b); }); if (tid_in_warp == 0 && warpid < slice_num) { result[warpid] = ceildiv(warp_result, stride_factor) * stride_factor; @@ -777,13 +777,13 @@ __global__ __launch_bounds__(default_block_size) void reduce_max_nnz_per_slice( __global__ __launch_bounds__(default_block_size) void reduce_total_cols( - size_type num_slices, const size_type *__restrict__ max_nnz_per_slice, - size_type *__restrict__ result) + size_type num_slices, const size_type* __restrict__ max_nnz_per_slice, + size_type* __restrict__ result) { __shared__ size_type block_result[default_block_size]; reduce_array(num_slices, max_nnz_per_slice, block_result, - [](const size_type &x, const size_type &y) { return x + y; }); + [](const size_type& x, const size_type& y) { return x + y; }); if (threadIdx.x == 0) { result[blockIdx.x] = block_result[0]; @@ -792,14 +792,14 @@ __global__ __launch_bounds__(default_block_size) void reduce_total_cols( __global__ __launch_bounds__(default_block_size) void reduce_max_nnz( - size_type size, const size_type *__restrict__ nnz_per_row, - size_type *__restrict__ result) + size_type size, const size_type* __restrict__ nnz_per_row, + size_type* __restrict__ result) { __shared__ size_type block_max[default_block_size]; reduce_array( size, nnz_per_row, block_max, - [](const size_type &x, const size_type &y) { return max(x, y); }); + [](const size_type& x, const size_type& y) { return max(x, y); }); if (threadIdx.x == 0) { result[blockIdx.x] = block_max[0]; @@ -811,8 +811,8 @@ template __global__ __launch_bounds__(default_block_size) void calculate_hybrid_coo_row_nnz( size_type num_rows, size_type ell_max_nnz_per_row, - IndexType *__restrict__ csr_row_idxs, - size_type *__restrict__ coo_row_nnz) + IndexType* __restrict__ csr_row_idxs, + size_type* __restrict__ coo_row_nnz) { const auto tidx = thread::get_thread_id_flat(); if (tidx < num_rows) { @@ -826,15 +826,15 @@ __global__ template __global__ __launch_bounds__(default_block_size) void fill_in_hybrid( size_type num_rows, size_type stride, size_type ell_max_nnz_per_row, - const ValueType *__restrict__ source_values, - const IndexType *__restrict__ source_row_ptrs, - const IndexType *__restrict__ source_col_idxs, - const size_type *__restrict__ coo_offset, - ValueType *__restrict__ result_ell_val, - IndexType *__restrict__ result_ell_col, - ValueType *__restrict__ result_coo_val, - IndexType *__restrict__ result_coo_col, - IndexType *__restrict__ result_coo_row) + const ValueType* __restrict__ source_values, + const IndexType* __restrict__ source_row_ptrs, + const IndexType* __restrict__ source_col_idxs, + const size_type* __restrict__ coo_offset, + ValueType* __restrict__ result_ell_val, + IndexType* __restrict__ result_ell_col, + ValueType* __restrict__ result_coo_val, + IndexType* __restrict__ result_coo_col, + IndexType* __restrict__ result_coo_row) { constexpr auto warp_size = config::warp_size; const auto row = thread::get_subwarp_id_flat(); @@ -863,8 +863,8 @@ __global__ __launch_bounds__(default_block_size) void fill_in_hybrid( template __global__ __launch_bounds__(default_block_size) void check_unsorted( - const IndexType *__restrict__ row_ptrs, - const IndexType *__restrict__ col_idxs, IndexType num_rows, bool *flag) + const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, IndexType num_rows, bool* flag) { __shared__ bool sh_flag; auto block = group::this_thread_block(); @@ -894,9 +894,9 @@ __global__ __launch_bounds__(default_block_size) void check_unsorted( template __global__ __launch_bounds__(default_block_size) void extract_diagonal( size_type diag_size, size_type nnz, - const ValueType *__restrict__ orig_values, - const IndexType *__restrict__ orig_row_ptrs, - const IndexType *__restrict__ orig_col_idxs, ValueType *__restrict__ diag) + const ValueType* __restrict__ orig_values, + const IndexType* __restrict__ orig_row_ptrs, + const IndexType* __restrict__ orig_col_idxs, ValueType* __restrict__ diag) { constexpr auto warp_size = config::warp_size; const auto row = thread::get_subwarp_id_flat(); @@ -925,7 +925,7 @@ namespace { template __global__ __launch_bounds__(default_block_size) void conjugate_kernel( - size_type num_nonzeros, ValueType *__restrict__ val) + size_type num_nonzeros, ValueType* __restrict__ val) { const auto tidx = thread::get_thread_id_flat(); @@ -940,8 +940,8 @@ __global__ __launch_bounds__(default_block_size) void conjugate_kernel( template __global__ __launch_bounds__(default_block_size) void row_ptr_permute_kernel( - size_type num_rows, const IndexType *__restrict__ permutation, - const IndexType *__restrict__ in_row_ptrs, IndexType *__restrict__ out_nnz) + size_type num_rows, const IndexType* __restrict__ permutation, + const IndexType* __restrict__ in_row_ptrs, IndexType* __restrict__ out_nnz) { auto tid = thread::get_thread_id_flat(); if (tid >= num_rows) { @@ -956,9 +956,9 @@ __global__ __launch_bounds__(default_block_size) void row_ptr_permute_kernel( template __global__ __launch_bounds__(default_block_size) void inv_row_ptr_permute_kernel( - size_type num_rows, const IndexType *__restrict__ permutation, - const IndexType *__restrict__ in_row_ptrs, - IndexType *__restrict__ out_nnz) + size_type num_rows, const IndexType* __restrict__ permutation, + const IndexType* __restrict__ in_row_ptrs, + IndexType* __restrict__ out_nnz) { auto tid = thread::get_thread_id_flat(); if (tid >= num_rows) { @@ -972,12 +972,12 @@ __global__ template __global__ __launch_bounds__(default_block_size) void row_permute_kernel( - size_type num_rows, const IndexType *__restrict__ permutation, - const IndexType *__restrict__ in_row_ptrs, - const IndexType *__restrict__ in_cols, - const ValueType *__restrict__ in_vals, - const IndexType *__restrict__ out_row_ptrs, - IndexType *__restrict__ out_cols, ValueType *__restrict__ out_vals) + size_type num_rows, const IndexType* __restrict__ permutation, + const IndexType* __restrict__ in_row_ptrs, + const IndexType* __restrict__ in_cols, + const ValueType* __restrict__ in_vals, + const IndexType* __restrict__ out_row_ptrs, + IndexType* __restrict__ out_cols, ValueType* __restrict__ out_vals) { auto tid = thread::get_subwarp_id_flat(); if (tid >= num_rows) { @@ -998,12 +998,12 @@ __global__ __launch_bounds__(default_block_size) void row_permute_kernel( template __global__ __launch_bounds__(default_block_size) void inv_row_permute_kernel( - size_type num_rows, const IndexType *__restrict__ permutation, - const IndexType *__restrict__ in_row_ptrs, - const IndexType *__restrict__ in_cols, - const ValueType *__restrict__ in_vals, - const IndexType *__restrict__ out_row_ptrs, - IndexType *__restrict__ out_cols, ValueType *__restrict__ out_vals) + size_type num_rows, const IndexType* __restrict__ permutation, + const IndexType* __restrict__ in_row_ptrs, + const IndexType* __restrict__ in_cols, + const ValueType* __restrict__ in_vals, + const IndexType* __restrict__ out_row_ptrs, + IndexType* __restrict__ out_cols, ValueType* __restrict__ out_vals) { auto tid = thread::get_subwarp_id_flat(); if (tid >= num_rows) { @@ -1024,12 +1024,12 @@ __global__ __launch_bounds__(default_block_size) void inv_row_permute_kernel( template __global__ __launch_bounds__(default_block_size) void inv_symm_permute_kernel( - size_type num_rows, const IndexType *__restrict__ permutation, - const IndexType *__restrict__ in_row_ptrs, - const IndexType *__restrict__ in_cols, - const ValueType *__restrict__ in_vals, - const IndexType *__restrict__ out_row_ptrs, - IndexType *__restrict__ out_cols, ValueType *__restrict__ out_vals) + size_type num_rows, const IndexType* __restrict__ permutation, + const IndexType* __restrict__ in_row_ptrs, + const IndexType* __restrict__ in_cols, + const ValueType* __restrict__ in_vals, + const IndexType* __restrict__ out_row_ptrs, + IndexType* __restrict__ out_cols, ValueType* __restrict__ out_vals) { auto tid = thread::get_subwarp_id_flat(); if (tid >= num_rows) { diff --git a/common/cuda_hip/matrix/dense_kernels.hpp.inc b/common/cuda_hip/matrix/dense_kernels.hpp.inc index c7ebafd0627..9b87c0cd8d3 100644 --- a/common/cuda_hip/matrix/dense_kernels.hpp.inc +++ b/common/cuda_hip/matrix/dense_kernels.hpp.inc @@ -36,7 +36,7 @@ namespace kernel { template __device__ void compute_partial_reduce(size_type num_rows, - OutType *__restrict__ work, + OutType* __restrict__ work, CallableGetValue get_value, CallableReduce reduce_op) { @@ -54,7 +54,7 @@ __device__ void compute_partial_reduce(size_type num_rows, __shared__ UninitializedArray tmp_work; tmp_work[local_id] = tmp; - reduce(group::this_thread_block(), static_cast(tmp_work), + reduce(group::this_thread_block(), static_cast(tmp_work), reduce_op); if (local_id == 0) { @@ -66,8 +66,8 @@ __device__ void compute_partial_reduce(size_type num_rows, template __device__ void finalize_reduce_computation(size_type size, - const ValueType *work, - ValueType *result, + const ValueType* work, + ValueType* result, CallableReduce reduce_op, CallableFinalize finalize_op) { @@ -80,7 +80,7 @@ __device__ void finalize_reduce_computation(size_type size, __shared__ UninitializedArray tmp_work; tmp_work[local_id] = tmp; - reduce(group::this_thread_block(), static_cast(tmp_work), + reduce(group::this_thread_block(), static_cast(tmp_work), reduce_op); if (local_id == 0) { @@ -91,75 +91,75 @@ __device__ void finalize_reduce_computation(size_type size, template __global__ __launch_bounds__(block_size) void compute_partial_dot( - size_type num_rows, const ValueType *__restrict__ x, size_type stride_x, - const ValueType *__restrict__ y, size_type stride_y, - ValueType *__restrict__ work) + size_type num_rows, const ValueType* __restrict__ x, size_type stride_x, + const ValueType* __restrict__ y, size_type stride_y, + ValueType* __restrict__ work) { compute_partial_reduce( num_rows, work, [x, stride_x, y, stride_y](size_type i) { return x[i * stride_x] * y[i * stride_y]; }, - [](const ValueType &x, const ValueType &y) { return x + y; }); + [](const ValueType& x, const ValueType& y) { return x + y; }); } template __global__ __launch_bounds__(block_size) void compute_partial_conj_dot( - size_type num_rows, const ValueType *__restrict__ x, size_type stride_x, - const ValueType *__restrict__ y, size_type stride_y, - ValueType *__restrict__ work) + size_type num_rows, const ValueType* __restrict__ x, size_type stride_x, + const ValueType* __restrict__ y, size_type stride_y, + ValueType* __restrict__ work) { compute_partial_reduce( num_rows, work, [x, stride_x, y, stride_y](size_type i) { return conj(x[i * stride_x]) * y[i * stride_y]; }, - [](const ValueType &x, const ValueType &y) { return x + y; }); + [](const ValueType& x, const ValueType& y) { return x + y; }); } template __global__ __launch_bounds__(block_size) void finalize_sum_reduce_computation( - size_type size, const ValueType *work, ValueType *result) + size_type size, const ValueType* work, ValueType* result) { finalize_reduce_computation( size, work, result, - [](const ValueType &x, const ValueType &y) { return x + y; }, - [](const ValueType &x) { return x; }); + [](const ValueType& x, const ValueType& y) { return x + y; }, + [](const ValueType& x) { return x; }); } template __global__ __launch_bounds__(block_size) void compute_partial_norm2( - size_type num_rows, const ValueType *__restrict__ x, size_type stride_x, - remove_complex *__restrict__ work) + size_type num_rows, const ValueType* __restrict__ x, size_type stride_x, + remove_complex* __restrict__ work) { using norm_type = remove_complex; compute_partial_reduce( num_rows, work, [x, stride_x](size_type i) { return squared_norm(x[i * stride_x]); }, - [](const norm_type &x, const norm_type &y) { return x + y; }); + [](const norm_type& x, const norm_type& y) { return x + y; }); } template __global__ __launch_bounds__(block_size) void finalize_sqrt_reduce_computation( - size_type size, const ValueType *work, ValueType *result) + size_type size, const ValueType* work, ValueType* result) { finalize_reduce_computation( size, work, result, - [](const ValueType &x, const ValueType &y) { return x + y; }, - [](const ValueType &x) { return sqrt(x); }); + [](const ValueType& x, const ValueType& y) { return x + y; }, + [](const ValueType& x) { return sqrt(x); }); } template __global__ __launch_bounds__(default_block_size) void fill_in_coo( size_type num_rows, size_type num_cols, size_type stride, - const size_type *__restrict__ row_ptrs, - const ValueType *__restrict__ source, IndexType *__restrict__ row_idxs, - IndexType *__restrict__ col_idxs, ValueType *__restrict__ values) + const size_type* __restrict__ row_ptrs, + const ValueType* __restrict__ source, IndexType* __restrict__ row_idxs, + IndexType* __restrict__ col_idxs, ValueType* __restrict__ values) { const auto tidx = thread::get_thread_id_flat(); if (tidx < num_rows) { @@ -180,7 +180,7 @@ __global__ __launch_bounds__(default_block_size) void fill_in_coo( template __global__ __launch_bounds__(default_block_size) void count_nnz_per_row( size_type num_rows, size_type num_cols, size_type stride, - const ValueType *__restrict__ work, IndexType *__restrict__ result) + const ValueType* __restrict__ work, IndexType* __restrict__ result) { constexpr auto warp_size = config::warp_size; const auto row_idx = thread::get_subwarp_id_flat(); @@ -196,7 +196,7 @@ __global__ __launch_bounds__(default_block_size) void count_nnz_per_row( } result[row_idx] = reduce( warp_tile, part_result, - [](const size_type &a, const size_type &b) { return a + b; }); + [](const size_type& a, const size_type& b) { return a + b; }); } } @@ -204,8 +204,8 @@ __global__ __launch_bounds__(default_block_size) void count_nnz_per_row( template __global__ __launch_bounds__(default_block_size) void fill_in_csr( size_type num_rows, size_type num_cols, size_type stride, - const ValueType *__restrict__ source, IndexType *__restrict__ row_ptrs, - IndexType *__restrict__ col_idxs, ValueType *__restrict__ values) + const ValueType* __restrict__ source, IndexType* __restrict__ row_ptrs, + IndexType* __restrict__ col_idxs, ValueType* __restrict__ values) { const auto tidx = thread::get_thread_id_flat(); @@ -225,9 +225,9 @@ __global__ __launch_bounds__(default_block_size) void fill_in_csr( template __global__ __launch_bounds__(default_block_size) void fill_in_ell( size_type num_rows, size_type num_cols, size_type source_stride, - const ValueType *__restrict__ source, size_type max_nnz_per_row, - size_type result_stride, IndexType *__restrict__ col_ptrs, - ValueType *__restrict__ values) + const ValueType* __restrict__ source, size_type max_nnz_per_row, + size_type result_stride, IndexType* __restrict__ col_ptrs, + ValueType* __restrict__ values) { const auto tidx = thread::get_thread_id_flat(); if (tidx < num_rows) { @@ -255,8 +255,8 @@ __global__ __launch_bounds__(default_block_size) void fill_in_ell( __global__ __launch_bounds__(config::warp_size) void calculate_slice_lengths( size_type num_rows, size_type slice_size, int slice_num, - size_type stride_factor, const size_type *__restrict__ nnz_per_row, - size_type *__restrict__ slice_lengths, size_type *__restrict__ slice_sets) + size_type stride_factor, const size_type* __restrict__ nnz_per_row, + size_type* __restrict__ slice_lengths, size_type* __restrict__ slice_sets) { constexpr auto warp_size = config::warp_size; const auto sliceid = blockIdx.x; @@ -275,7 +275,7 @@ __global__ __launch_bounds__(config::warp_size) void calculate_slice_lengths( group::tiled_partition(group::this_thread_block()); auto warp_result = reduce( warp_tile, thread_result, - [](const size_type &a, const size_type &b) { return max(a, b); }); + [](const size_type& a, const size_type& b) { return max(a, b); }); if (tid_in_warp == 0) { auto slice_length = @@ -290,9 +290,9 @@ __global__ __launch_bounds__(config::warp_size) void calculate_slice_lengths( template __global__ __launch_bounds__(default_block_size) void fill_in_sellp( size_type num_rows, size_type num_cols, size_type slice_size, - size_type stride, const ValueType *__restrict__ source, - size_type *__restrict__ slice_lengths, size_type *__restrict__ slice_sets, - IndexType *__restrict__ col_idxs, ValueType *__restrict__ vals) + size_type stride, const ValueType* __restrict__ source, + size_type* __restrict__ slice_lengths, size_type* __restrict__ slice_sets, + IndexType* __restrict__ col_idxs, ValueType* __restrict__ vals) { const auto global_row = thread::get_thread_id_flat(); const auto row = global_row % slice_size; @@ -321,14 +321,14 @@ __global__ __launch_bounds__(default_block_size) void fill_in_sellp( __global__ __launch_bounds__(default_block_size) void reduce_max_nnz( - size_type size, const size_type *__restrict__ nnz_per_row, - size_type *__restrict__ result) + size_type size, const size_type* __restrict__ nnz_per_row, + size_type* __restrict__ result) { extern __shared__ size_type block_max[]; reduce_array( size, nnz_per_row, block_max, - [](const size_type &x, const size_type &y) { return max(x, y); }); + [](const size_type& x, const size_type& y) { return max(x, y); }); if (threadIdx.x == 0) { result[blockIdx.x] = block_max[0]; @@ -338,7 +338,7 @@ __global__ __launch_bounds__(default_block_size) void reduce_max_nnz( __global__ __launch_bounds__(default_block_size) void reduce_max_nnz_per_slice( size_type num_rows, size_type slice_size, size_type stride_factor, - const size_type *__restrict__ nnz_per_row, size_type *__restrict__ result) + const size_type* __restrict__ nnz_per_row, size_type* __restrict__ result) { constexpr auto warp_size = config::warp_size; auto warp_tile = @@ -357,7 +357,7 @@ __global__ __launch_bounds__(default_block_size) void reduce_max_nnz_per_slice( auto warp_result = reduce( warp_tile, thread_result, - [](const size_type &a, const size_type &b) { return max(a, b); }); + [](const size_type& a, const size_type& b) { return max(a, b); }); if (tid_in_warp == 0 && warpid < slice_num) { result[warpid] = ceildiv(warp_result, stride_factor) * stride_factor; @@ -366,13 +366,13 @@ __global__ __launch_bounds__(default_block_size) void reduce_max_nnz_per_slice( __global__ __launch_bounds__(default_block_size) void reduce_total_cols( - size_type num_slices, const size_type *__restrict__ max_nnz_per_slice, - size_type *__restrict__ result) + size_type num_slices, const size_type* __restrict__ max_nnz_per_slice, + size_type* __restrict__ result) { extern __shared__ size_type block_result[]; reduce_array(num_slices, max_nnz_per_slice, block_result, - [](const size_type &x, const size_type &y) { return x + y; }); + [](const size_type& x, const size_type& y) { return x + y; }); if (threadIdx.x == 0) { result[blockIdx.x] = block_result[0]; diff --git a/common/cuda_hip/matrix/diagonal_kernels.hpp.inc b/common/cuda_hip/matrix/diagonal_kernels.hpp.inc index 9a3736766e6..923ad5c0563 100644 --- a/common/cuda_hip/matrix/diagonal_kernels.hpp.inc +++ b/common/cuda_hip/matrix/diagonal_kernels.hpp.inc @@ -35,9 +35,9 @@ namespace kernel { template __global__ __launch_bounds__(default_block_size) void apply_to_csr( - size_type num_rows, const ValueType *__restrict__ diag, - const IndexType *__restrict__ row_ptrs, - ValueType *__restrict__ result_values) + size_type num_rows, const ValueType* __restrict__ diag, + const IndexType* __restrict__ row_ptrs, + ValueType* __restrict__ result_values) { constexpr auto warp_size = config::warp_size; auto warp_tile = diff --git a/common/cuda_hip/matrix/ell_kernels.hpp.inc b/common/cuda_hip/matrix/ell_kernels.hpp.inc index f1845111eec..ca99cce1153 100644 --- a/common/cuda_hip/matrix/ell_kernels.hpp.inc +++ b/common/cuda_hip/matrix/ell_kernels.hpp.inc @@ -39,9 +39,9 @@ template __device__ void spmv_kernel( const size_type num_rows, const int num_worker_per_row, - acc::range val, const IndexType *__restrict__ col, + acc::range val, const IndexType* __restrict__ col, const size_type stride, const size_type num_stored_elements_per_row, - acc::range b, OutputValueType *__restrict__ c, + acc::range b, OutputValueType* __restrict__ c, const size_type c_stride, Closure op) { const auto tidx = thread::get_thread_id_flat(); @@ -107,15 +107,15 @@ template __global__ __launch_bounds__(default_block_size) void spmv( const size_type num_rows, const int num_worker_per_row, - acc::range val, const IndexType *__restrict__ col, + acc::range val, const IndexType* __restrict__ col, const size_type stride, const size_type num_stored_elements_per_row, - acc::range b, OutputValueType *__restrict__ c, + acc::range b, OutputValueType* __restrict__ c, const size_type c_stride) { spmv_kernel( num_rows, num_worker_per_row, val, col, stride, num_stored_elements_per_row, b, c, c_stride, - [](const OutputValueType &x, const OutputValueType &y) { return x; }); + [](const OutputValueType& x, const OutputValueType& y) { return x; }); } @@ -124,9 +124,9 @@ template alpha, acc::range val, - const IndexType *__restrict__ col, const size_type stride, + const IndexType* __restrict__ col, const size_type stride, const size_type num_stored_elements_per_row, acc::range b, - const OutputValueType *__restrict__ beta, OutputValueType *__restrict__ c, + const OutputValueType* __restrict__ beta, OutputValueType* __restrict__ c, const size_type c_stride) { const OutputValueType alpha_val = alpha(0); @@ -140,15 +140,15 @@ __global__ __launch_bounds__(default_block_size) void spmv( spmv_kernel( num_rows, num_worker_per_row, val, col, stride, num_stored_elements_per_row, b, c, c_stride, - [&alpha_val](const OutputValueType &x, const OutputValueType &y) { + [&alpha_val](const OutputValueType& x, const OutputValueType& y) { return alpha_val * x; }); } else { spmv_kernel( num_rows, num_worker_per_row, val, col, stride, num_stored_elements_per_row, b, c, c_stride, - [&alpha_val, &beta_val](const OutputValueType &x, - const OutputValueType &y) { + [&alpha_val, &beta_val](const OutputValueType& x, + const OutputValueType& y) { return alpha_val * x + beta_val * y; }); } @@ -161,7 +161,7 @@ __global__ __launch_bounds__(default_block_size) void spmv( template __global__ __launch_bounds__(config::max_block_size) void initialize_zero_dense( size_type num_rows, size_type num_cols, size_type stride, - ValueType *__restrict__ result) + ValueType* __restrict__ result) { const auto tidx_x = threadIdx.x + blockDim.x * blockIdx.x; const auto tidx_y = threadIdx.y + blockDim.y * blockIdx.y; @@ -174,9 +174,9 @@ __global__ __launch_bounds__(config::max_block_size) void initialize_zero_dense( template __global__ __launch_bounds__(default_block_size) void fill_in_dense( size_type num_rows, size_type nnz, size_type source_stride, - const IndexType *__restrict__ col_idxs, - const ValueType *__restrict__ values, size_type result_stride, - ValueType *__restrict__ result) + const IndexType* __restrict__ col_idxs, + const ValueType* __restrict__ values, size_type result_stride, + ValueType* __restrict__ result) { const auto tidx = thread::get_thread_id_flat(); if (tidx < num_rows) { @@ -192,7 +192,7 @@ __global__ __launch_bounds__(default_block_size) void fill_in_dense( template __global__ __launch_bounds__(default_block_size) void count_nnz_per_row( size_type num_rows, size_type max_nnz_per_row, size_type stride, - const ValueType *__restrict__ values, IndexType *__restrict__ result) + const ValueType* __restrict__ values, IndexType* __restrict__ result) { constexpr auto warp_size = config::warp_size; const auto row_idx = thread::get_subwarp_id_flat(); @@ -209,7 +209,7 @@ __global__ __launch_bounds__(default_block_size) void count_nnz_per_row( } result[row_idx] = reduce( warp_tile, part_result, - [](const size_type &a, const size_type &b) { return a + b; }); + [](const size_type& a, const size_type& b) { return a + b; }); } } @@ -217,11 +217,11 @@ __global__ __launch_bounds__(default_block_size) void count_nnz_per_row( template __global__ __launch_bounds__(default_block_size) void fill_in_csr( size_type num_rows, size_type max_nnz_per_row, size_type stride, - const ValueType *__restrict__ source_values, - const IndexType *__restrict__ source_col_idxs, - IndexType *__restrict__ result_row_ptrs, - IndexType *__restrict__ result_col_idxs, - ValueType *__restrict__ result_values) + const ValueType* __restrict__ source_values, + const IndexType* __restrict__ source_col_idxs, + IndexType* __restrict__ result_row_ptrs, + IndexType* __restrict__ result_col_idxs, + ValueType* __restrict__ result_values) { const auto tidx = thread::get_thread_id_flat(); @@ -242,8 +242,8 @@ __global__ __launch_bounds__(default_block_size) void fill_in_csr( template __global__ __launch_bounds__(default_block_size) void extract_diagonal( size_type diag_size, size_type max_nnz_per_row, size_type orig_stride, - const ValueType *__restrict__ orig_values, - const IndexType *__restrict__ orig_col_idxs, ValueType *__restrict__ diag) + const ValueType* __restrict__ orig_values, + const IndexType* __restrict__ orig_col_idxs, ValueType* __restrict__ diag) { const auto tidx = thread::get_thread_id_flat(); const auto row = tidx % diag_size; diff --git a/common/cuda_hip/matrix/hybrid_kernels.hpp.inc b/common/cuda_hip/matrix/hybrid_kernels.hpp.inc index c7c192189e0..a2c2cd0d158 100644 --- a/common/cuda_hip/matrix/hybrid_kernels.hpp.inc +++ b/common/cuda_hip/matrix/hybrid_kernels.hpp.inc @@ -48,8 +48,8 @@ template __global__ __launch_bounds__(default_block_size) void count_coo_row_nnz( const size_type nnz, const size_type num_lines, - const ValueType *__restrict__ val, const IndexType *__restrict__ row, - IndexType *__restrict__ nnz_per_row) + const ValueType* __restrict__ val, const IndexType* __restrict__ row, + IndexType* __restrict__ nnz_per_row) { IndexType temp_val = 0; const auto start = static_cast(blockDim.x) * blockIdx.x * @@ -95,14 +95,14 @@ __global__ __launch_bounds__(default_block_size) void count_coo_row_nnz( template __global__ __launch_bounds__(default_block_size) void fill_in_csr( size_type num_rows, size_type max_nnz_per_row, size_type stride, - const ValueType *__restrict__ ell_val, - const IndexType *__restrict__ ell_col, - const ValueType *__restrict__ coo_val, - const IndexType *__restrict__ coo_col, - const IndexType *__restrict__ coo_offset, - IndexType *__restrict__ result_row_ptrs, - IndexType *__restrict__ result_col_idxs, - ValueType *__restrict__ result_values) + const ValueType* __restrict__ ell_val, + const IndexType* __restrict__ ell_col, + const ValueType* __restrict__ coo_val, + const IndexType* __restrict__ coo_col, + const IndexType* __restrict__ coo_offset, + IndexType* __restrict__ result_row_ptrs, + IndexType* __restrict__ result_col_idxs, + ValueType* __restrict__ result_values) { const auto tidx = thread::get_thread_id_flat(); @@ -129,8 +129,8 @@ __global__ __launch_bounds__(default_block_size) void fill_in_csr( template __global__ __launch_bounds__(default_block_size) void add( - size_type num, ValueType1 *__restrict__ val1, - const ValueType2 *__restrict__ val2) + size_type num, ValueType1* __restrict__ val1, + const ValueType2* __restrict__ val2) { const auto tidx = thread::get_thread_id_flat(); if (tidx < num) { diff --git a/common/cuda_hip/matrix/sellp_kernels.hpp.inc b/common/cuda_hip/matrix/sellp_kernels.hpp.inc index b0e7ab9dc93..ccff472492b 100644 --- a/common/cuda_hip/matrix/sellp_kernels.hpp.inc +++ b/common/cuda_hip/matrix/sellp_kernels.hpp.inc @@ -36,10 +36,10 @@ namespace { template __global__ __launch_bounds__(matrix::default_slice_size) void spmv_kernel( size_type num_rows, size_type num_right_hand_sides, size_type b_stride, - size_type c_stride, const size_type *__restrict__ slice_lengths, - const size_type *__restrict__ slice_sets, const ValueType *__restrict__ a, - const IndexType *__restrict__ col, const ValueType *__restrict__ b, - ValueType *__restrict__ c) + size_type c_stride, const size_type* __restrict__ slice_lengths, + const size_type* __restrict__ slice_sets, const ValueType* __restrict__ a, + const IndexType* __restrict__ col, const ValueType* __restrict__ b, + ValueType* __restrict__ c) { const auto slice_id = blockIdx.x; const auto slice_size = blockDim.x; @@ -63,11 +63,11 @@ template __global__ __launch_bounds__(matrix::default_slice_size) void advanced_spmv_kernel( size_type num_rows, size_type num_right_hand_sides, size_type b_stride, - size_type c_stride, const size_type *__restrict__ slice_lengths, - const size_type *__restrict__ slice_sets, - const ValueType *__restrict__ alpha, const ValueType *__restrict__ a, - const IndexType *__restrict__ col, const ValueType *__restrict__ b, - const ValueType *__restrict__ beta, ValueType *__restrict__ c) + size_type c_stride, const size_type* __restrict__ slice_lengths, + const size_type* __restrict__ slice_sets, + const ValueType* __restrict__ alpha, const ValueType* __restrict__ a, + const IndexType* __restrict__ col, const ValueType* __restrict__ b, + const ValueType* __restrict__ beta, ValueType* __restrict__ c) { const auto slice_id = blockIdx.x; const auto slice_size = blockDim.x; @@ -97,7 +97,7 @@ namespace kernel { template __global__ __launch_bounds__(config::max_block_size) void initialize_zero_dense( size_type num_rows, size_type num_cols, size_type stride, - ValueType *__restrict__ result) + ValueType* __restrict__ result) { const auto tidx_x = threadIdx.x + blockDim.x * blockIdx.x; const auto tidx_y = threadIdx.y + blockDim.y * blockIdx.y; @@ -110,10 +110,10 @@ __global__ __launch_bounds__(config::max_block_size) void initialize_zero_dense( template __global__ __launch_bounds__(default_block_size) void fill_in_dense( size_type num_rows, size_type num_cols, size_type stride, - size_type slice_size, const size_type *__restrict__ slice_lengths, - const size_type *__restrict__ slice_sets, - const IndexType *__restrict__ col_idxs, - const ValueType *__restrict__ values, ValueType *__restrict__ result) + size_type slice_size, const size_type* __restrict__ slice_lengths, + const size_type* __restrict__ slice_sets, + const IndexType* __restrict__ col_idxs, + const ValueType* __restrict__ values, ValueType* __restrict__ result) { const auto global_row = thread::get_subwarp_id_flat(); const auto row = global_row % slice_size; @@ -137,8 +137,8 @@ __global__ __launch_bounds__(default_block_size) void fill_in_dense( template __global__ __launch_bounds__(default_block_size) void count_nnz_per_row( size_type num_rows, size_type slice_size, - const size_type *__restrict__ slice_sets, - const ValueType *__restrict__ values, IndexType *__restrict__ result) + const size_type* __restrict__ slice_sets, + const ValueType* __restrict__ values, IndexType* __restrict__ result) { constexpr auto warp_size = config::warp_size; auto warp_tile = @@ -161,7 +161,7 @@ __global__ __launch_bounds__(default_block_size) void count_nnz_per_row( } result[row_idx] = reduce( warp_tile, part_result, - [](const size_type &a, const size_type &b) { return a + b; }); + [](const size_type& a, const size_type& b) { return a + b; }); } } @@ -169,12 +169,12 @@ __global__ __launch_bounds__(default_block_size) void count_nnz_per_row( template __global__ __launch_bounds__(default_block_size) void fill_in_csr( size_type num_rows, size_type slice_size, - const size_type *__restrict__ source_slice_sets, - const IndexType *__restrict__ source_col_idxs, - const ValueType *__restrict__ source_values, - IndexType *__restrict__ result_row_ptrs, - IndexType *__restrict__ result_col_idxs, - ValueType *__restrict__ result_values) + const size_type* __restrict__ source_slice_sets, + const IndexType* __restrict__ source_col_idxs, + const ValueType* __restrict__ source_values, + IndexType* __restrict__ result_row_ptrs, + IndexType* __restrict__ result_col_idxs, + ValueType* __restrict__ result_values) { const auto row = thread::get_thread_id_flat(); const auto slice_id = row / slice_size; @@ -199,9 +199,9 @@ __global__ __launch_bounds__(default_block_size) void fill_in_csr( template __global__ __launch_bounds__(default_block_size) void extract_diagonal( size_type diag_size, size_type slice_size, - const size_type *__restrict__ orig_slice_sets, - const ValueType *__restrict__ orig_values, - const IndexType *__restrict__ orig_col_idxs, ValueType *__restrict__ diag) + const size_type* __restrict__ orig_slice_sets, + const ValueType* __restrict__ orig_values, + const IndexType* __restrict__ orig_col_idxs, ValueType* __restrict__ diag) { constexpr auto warp_size = config::warp_size; auto warp_tile = diff --git a/common/cuda_hip/multigrid/amgx_pgm_kernels.hpp.inc b/common/cuda_hip/multigrid/amgx_pgm_kernels.hpp.inc index f03fc4a59b3..d4fde6338bd 100644 --- a/common/cuda_hip/multigrid/amgx_pgm_kernels.hpp.inc +++ b/common/cuda_hip/multigrid/amgx_pgm_kernels.hpp.inc @@ -35,8 +35,8 @@ namespace kernel { template __global__ __launch_bounds__(default_block_size) void match_edge_kernel( - size_type num, const IndexType *__restrict__ strongest_neighbor_vals, - IndexType *__restrict__ agg_vals) + size_type num, const IndexType* __restrict__ strongest_neighbor_vals, + IndexType* __restrict__ agg_vals) { auto tidx = thread::get_thread_id_flat(); if (tidx >= num) { @@ -57,8 +57,8 @@ __global__ __launch_bounds__(default_block_size) void match_edge_kernel( template __global__ __launch_bounds__(default_block_size) void activate_kernel( - size_type num, const IndexType *__restrict__ agg, - IndexType *__restrict__ active_agg) + size_type num, const IndexType* __restrict__ agg, + IndexType* __restrict__ active_agg) { auto tidx = thread::get_thread_id_flat(); if (tidx >= num) { @@ -70,8 +70,8 @@ __global__ __launch_bounds__(default_block_size) void activate_kernel( template __global__ __launch_bounds__(default_block_size) void fill_agg_kernel( - size_type num, const IndexType *__restrict__ index, - IndexType *__restrict__ result) + size_type num, const IndexType* __restrict__ index, + IndexType* __restrict__ result) { auto tidx = thread::get_thread_id_flat(); if (tidx >= num) { @@ -85,8 +85,8 @@ __global__ __launch_bounds__(default_block_size) void fill_agg_kernel( template __global__ __launch_bounds__(default_block_size) void renumber_kernel( - size_type num, const IndexType *__restrict__ map, - IndexType *__restrict__ result) + size_type num, const IndexType* __restrict__ map, + IndexType* __restrict__ result) { auto tidx = thread::get_thread_id_flat(); if (tidx >= num) { @@ -99,11 +99,11 @@ __global__ __launch_bounds__(default_block_size) void renumber_kernel( template __global__ __launch_bounds__(default_block_size) void find_strongest_neighbor_kernel( - const size_type num, const IndexType *__restrict__ row_ptrs, - const IndexType *__restrict__ col_idxs, - const ValueType *__restrict__ weight_vals, - const ValueType *__restrict__ diag, IndexType *__restrict__ agg, - IndexType *__restrict__ strongest_neighbor) + const size_type num, const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, + const ValueType* __restrict__ weight_vals, + const ValueType* __restrict__ diag, IndexType* __restrict__ agg, + IndexType* __restrict__ strongest_neighbor) { auto row = thread::get_thread_id_flat(); if (row >= num) { @@ -155,12 +155,12 @@ __global__ template __global__ __launch_bounds__(default_block_size) void assign_to_exist_agg_kernel( - const size_type num, const IndexType *__restrict__ row_ptrs, - const IndexType *__restrict__ col_idxs, - const ValueType *__restrict__ weight_vals, - const ValueType *__restrict__ diag, - const IndexType *__restrict__ agg_const_val, - IndexType *__restrict__ agg_val) + const size_type num, const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, + const ValueType* __restrict__ weight_vals, + const ValueType* __restrict__ diag, + const IndexType* __restrict__ agg_const_val, + IndexType* __restrict__ agg_val) { auto row = thread::get_thread_id_flat(); if (row >= num || agg_val[row] != -1) { @@ -193,10 +193,10 @@ __global__ template __global__ __launch_bounds__(default_block_size) void assign_to_exist_agg_kernel( - const size_type num, const IndexType *__restrict__ row_ptrs, - const IndexType *__restrict__ col_idxs, - const ValueType *__restrict__ weight_vals, - const ValueType *__restrict__ diag, IndexType *__restrict__ agg_val) + const size_type num, const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, + const ValueType* __restrict__ weight_vals, + const ValueType* __restrict__ diag, IndexType* __restrict__ agg_val) { auto row = thread::get_thread_id_flat(); if (row >= num || agg_val[row] != -1) { diff --git a/common/cuda_hip/preconditioner/isai_kernels.hpp.inc b/common/cuda_hip/preconditioner/isai_kernels.hpp.inc index 27fc0c35b32..1796169cd56 100644 --- a/common/cuda_hip/preconditioner/isai_kernels.hpp.inc +++ b/common/cuda_hip/preconditioner/isai_kernels.hpp.inc @@ -44,13 +44,13 @@ namespace kernel { template __forceinline__ __device__ void generic_generate( - IndexType num_rows, const IndexType *__restrict__ m_row_ptrs, - const IndexType *__restrict__ m_col_idxs, - const ValueType *__restrict__ m_values, - const IndexType *__restrict__ i_row_ptrs, - const IndexType *__restrict__ i_col_idxs, ValueType *__restrict__ i_values, - IndexType *__restrict__ excess_rhs_sizes, - IndexType *__restrict__ excess_nnz, Callable direct_solve) + IndexType num_rows, const IndexType* __restrict__ m_row_ptrs, + const IndexType* __restrict__ m_col_idxs, + const ValueType* __restrict__ m_values, + const IndexType* __restrict__ i_row_ptrs, + const IndexType* __restrict__ i_col_idxs, ValueType* __restrict__ i_values, + IndexType* __restrict__ excess_rhs_sizes, + IndexType* __restrict__ excess_nnz, Callable direct_solve) { static_assert(subwarp_size >= row_size_limit, "incompatible subwarp_size"); const auto row = thread::get_subwarp_id_flat(); @@ -102,7 +102,7 @@ __forceinline__ __device__ void generic_generate( auto dense_system_ptr = storage + (threadIdx.x / subwarp_size) * subwarp_size * subwarp_size; // row-major accessor - auto dense_system = [&](IndexType row, IndexType col) -> ValueType & { + auto dense_system = [&](IndexType row, IndexType col) -> ValueType& { return dense_system_ptr[row * subwarp_size + col]; }; @@ -164,17 +164,17 @@ __forceinline__ __device__ void generic_generate( template __global__ __launch_bounds__(default_block_size) void generate_l_inverse( - IndexType num_rows, const IndexType *__restrict__ m_row_ptrs, - const IndexType *__restrict__ m_col_idxs, - const ValueType *__restrict__ m_values, - const IndexType *__restrict__ i_row_ptrs, - const IndexType *__restrict__ i_col_idxs, ValueType *__restrict__ i_values, - IndexType *__restrict__ excess_rhs_sizes, - IndexType *__restrict__ excess_nnz) + IndexType num_rows, const IndexType* __restrict__ m_row_ptrs, + const IndexType* __restrict__ m_col_idxs, + const ValueType* __restrict__ m_values, + const IndexType* __restrict__ i_row_ptrs, + const IndexType* __restrict__ i_col_idxs, ValueType* __restrict__ i_values, + IndexType* __restrict__ excess_rhs_sizes, + IndexType* __restrict__ excess_nnz) { auto trs_solve = - [](IndexType num_elems, const ValueType *__restrict__ local_row, - group::thread_block_tile &subwarp, size_type) { + [](IndexType num_elems, const ValueType* __restrict__ local_row, + group::thread_block_tile& subwarp, size_type) { const int local_id = subwarp.thread_rank(); ValueType rhs = local_id == num_elems - 1 ? one() : zero(); @@ -203,17 +203,17 @@ __global__ __launch_bounds__(default_block_size) void generate_l_inverse( template __global__ __launch_bounds__(default_block_size) void generate_u_inverse( - IndexType num_rows, const IndexType *__restrict__ m_row_ptrs, - const IndexType *__restrict__ m_col_idxs, - const ValueType *__restrict__ m_values, - const IndexType *__restrict__ i_row_ptrs, - const IndexType *__restrict__ i_col_idxs, ValueType *__restrict__ i_values, - IndexType *__restrict__ excess_rhs_sizes, - IndexType *__restrict__ excess_nnz) + IndexType num_rows, const IndexType* __restrict__ m_row_ptrs, + const IndexType* __restrict__ m_col_idxs, + const ValueType* __restrict__ m_values, + const IndexType* __restrict__ i_row_ptrs, + const IndexType* __restrict__ i_col_idxs, ValueType* __restrict__ i_values, + IndexType* __restrict__ excess_rhs_sizes, + IndexType* __restrict__ excess_nnz) { auto trs_solve = [](IndexType num_elems, - const ValueType *__restrict__ local_row, - group::thread_block_tile &subwarp, + const ValueType* __restrict__ local_row, + group::thread_block_tile& subwarp, size_type) { const int local_id = subwarp.thread_rank(); ValueType rhs = local_id == 0 ? one() : zero(); @@ -242,17 +242,17 @@ __global__ __launch_bounds__(default_block_size) void generate_u_inverse( template __global__ __launch_bounds__(default_block_size) void generate_general_inverse( - IndexType num_rows, const IndexType *__restrict__ m_row_ptrs, - const IndexType *__restrict__ m_col_idxs, - const ValueType *__restrict__ m_values, - const IndexType *__restrict__ i_row_ptrs, - const IndexType *__restrict__ i_col_idxs, ValueType *__restrict__ i_values, - IndexType *__restrict__ excess_rhs_sizes, - IndexType *__restrict__ excess_nnz, bool spd) + IndexType num_rows, const IndexType* __restrict__ m_row_ptrs, + const IndexType* __restrict__ m_col_idxs, + const ValueType* __restrict__ m_values, + const IndexType* __restrict__ i_row_ptrs, + const IndexType* __restrict__ i_col_idxs, ValueType* __restrict__ i_values, + IndexType* __restrict__ excess_rhs_sizes, + IndexType* __restrict__ excess_nnz, bool spd) { auto general_solve = [spd](IndexType num_elems, - ValueType *__restrict__ local_row, - group::thread_block_tile &subwarp, + ValueType* __restrict__ local_row, + group::thread_block_tile& subwarp, size_type rhs_one_idx) { const int local_id = subwarp.thread_rank(); ValueType rhs = @@ -291,16 +291,16 @@ __global__ __launch_bounds__(default_block_size) void generate_general_inverse( template __global__ __launch_bounds__(default_block_size) void generate_excess_system( - IndexType num_rows, const IndexType *__restrict__ m_row_ptrs, - const IndexType *__restrict__ m_col_idxs, - const ValueType *__restrict__ m_values, - const IndexType *__restrict__ i_row_ptrs, - const IndexType *__restrict__ i_col_idxs, - const IndexType *__restrict__ excess_rhs_ptrs, - const IndexType *__restrict__ excess_nz_ptrs, - IndexType *__restrict__ excess_row_ptrs, - IndexType *__restrict__ excess_col_idxs, - ValueType *__restrict__ excess_values, ValueType *__restrict__ excess_rhs, + IndexType num_rows, const IndexType* __restrict__ m_row_ptrs, + const IndexType* __restrict__ m_col_idxs, + const ValueType* __restrict__ m_values, + const IndexType* __restrict__ i_row_ptrs, + const IndexType* __restrict__ i_col_idxs, + const IndexType* __restrict__ excess_rhs_ptrs, + const IndexType* __restrict__ excess_nz_ptrs, + IndexType* __restrict__ excess_row_ptrs, + IndexType* __restrict__ excess_col_idxs, + ValueType* __restrict__ excess_values, ValueType* __restrict__ excess_rhs, size_type e_start, size_type e_end) { const auto row = @@ -365,8 +365,8 @@ __global__ __launch_bounds__(default_block_size) void generate_excess_system( template __global__ __launch_bounds__(default_block_size) void scale_excess_solution( - const IndexType *__restrict__ excess_block_ptrs, - ValueType *__restrict__ excess_solution, size_type e_start, size_type e_end) + const IndexType* __restrict__ excess_block_ptrs, + ValueType* __restrict__ excess_solution, size_type e_start, size_type e_end) { const auto warp_id = thread::get_subwarp_id_flat(); auto subwarp = @@ -395,10 +395,10 @@ __global__ __launch_bounds__(default_block_size) void scale_excess_solution( template __global__ __launch_bounds__(default_block_size) void copy_excess_solution( - IndexType num_rows, const IndexType *__restrict__ i_row_ptrs, - const IndexType *__restrict__ excess_rhs_ptrs, - const ValueType *__restrict__ excess_solution, - ValueType *__restrict__ i_values, size_type e_start, size_type e_end) + IndexType num_rows, const IndexType* __restrict__ i_row_ptrs, + const IndexType* __restrict__ excess_rhs_ptrs, + const ValueType* __restrict__ excess_solution, + ValueType* __restrict__ i_values, size_type e_start, size_type e_end) { const auto excess_row = thread::get_subwarp_id_flat(); diff --git a/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernel.hpp.inc b/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernel.hpp.inc index abf100be7e5..e574a8b3821 100644 --- a/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernel.hpp.inc +++ b/common/cuda_hip/preconditioner/jacobi_advanced_apply_kernel.hpp.inc @@ -35,14 +35,14 @@ namespace kernel { template -__global__ void __launch_bounds__(warps_per_block *config::warp_size) - advanced_apply(const ValueType *__restrict__ blocks, +__global__ void __launch_bounds__(warps_per_block* config::warp_size) + advanced_apply(const ValueType* __restrict__ blocks, preconditioner::block_interleaved_storage_scheme storage_scheme, - const IndexType *__restrict__ block_ptrs, - size_type num_blocks, const ValueType *__restrict__ alpha, - const ValueType *__restrict__ b, int32 b_stride, - ValueType *__restrict__ x, int32 x_stride) + const IndexType* __restrict__ block_ptrs, + size_type num_blocks, const ValueType* __restrict__ alpha, + const ValueType* __restrict__ b, int32 b_stride, + ValueType* __restrict__ x, int32 x_stride) { const auto block_id = thread::get_subwarp_id(); @@ -63,20 +63,20 @@ __global__ void __launch_bounds__(warps_per_block *config::warp_size) subwarp.thread_rank(), storage_scheme.get_stride(), x + block_ptrs[block_id] * x_stride, x_stride, - [](ValueType &result, const ValueType &out) { result += out; }); + [](ValueType& result, const ValueType& out) { result += out; }); } template __global__ void -__launch_bounds__(warps_per_block *config::warp_size) advanced_adaptive_apply( - const ValueType *__restrict__ blocks, +__launch_bounds__(warps_per_block* config::warp_size) advanced_adaptive_apply( + const ValueType* __restrict__ blocks, preconditioner::block_interleaved_storage_scheme storage_scheme, - const precision_reduction *__restrict__ block_precisions, - const IndexType *__restrict__ block_ptrs, size_type num_blocks, - const ValueType *__restrict__ alpha, const ValueType *__restrict__ b, - int32 b_stride, ValueType *__restrict__ x, int32 x_stride) + const precision_reduction* __restrict__ block_precisions, + const IndexType* __restrict__ block_ptrs, size_type num_blocks, + const ValueType* __restrict__ alpha, const ValueType* __restrict__ b, + int32 b_stride, ValueType* __restrict__ x, int32 x_stride) { const auto block_id = thread::get_subwarp_id(); @@ -96,13 +96,13 @@ __launch_bounds__(warps_per_block *config::warp_size) advanced_adaptive_apply( ValueType, block_precisions[block_id], multiply_vec( subwarp, block_size, v, - reinterpret_cast( + reinterpret_cast( blocks + storage_scheme.get_group_offset(block_id)) + storage_scheme.get_block_offset(block_id) + subwarp.thread_rank(), storage_scheme.get_stride(), x + block_ptrs[block_id] * x_stride, x_stride, - [](ValueType &result, const ValueType &out) { result += out; })); + [](ValueType& result, const ValueType& out) { result += out; })); } diff --git a/common/cuda_hip/preconditioner/jacobi_generate_kernel.hpp.inc b/common/cuda_hip/preconditioner/jacobi_generate_kernel.hpp.inc index 713dcc3dedc..52e1fce3c03 100644 --- a/common/cuda_hip/preconditioner/jacobi_generate_kernel.hpp.inc +++ b/common/cuda_hip/preconditioner/jacobi_generate_kernel.hpp.inc @@ -36,8 +36,8 @@ namespace kernel { template __device__ __forceinline__ bool validate_precision_reduction_feasibility( - Group &__restrict__ group, IndexType block_size, - ValueType *__restrict__ row, ValueType *__restrict__ work, size_type stride) + Group& __restrict__ group, IndexType block_size, + ValueType* __restrict__ row, ValueType* __restrict__ work, size_type stride) { using gko::detail::float_traits; // save original data and reduce precision @@ -79,12 +79,12 @@ __device__ __forceinline__ bool validate_precision_reduction_feasibility( template -__global__ void __launch_bounds__(warps_per_block *config::warp_size) generate( - size_type num_rows, const IndexType *__restrict__ row_ptrs, - const IndexType *__restrict__ col_idxs, - const ValueType *__restrict__ values, ValueType *__restrict__ block_data, +__global__ void __launch_bounds__(warps_per_block* config::warp_size) generate( + size_type num_rows, const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, + const ValueType* __restrict__ values, ValueType* __restrict__ block_data, preconditioner::block_interleaved_storage_scheme storage_scheme, - const IndexType *__restrict__ block_ptrs, size_type num_blocks) + const IndexType* __restrict__ block_ptrs, size_type num_blocks) { const auto block_id = thread::get_subwarp_id(); @@ -114,15 +114,15 @@ __global__ void __launch_bounds__(warps_per_block *config::warp_size) generate( template __global__ void -__launch_bounds__(warps_per_block *config::warp_size) adaptive_generate( - size_type num_rows, const IndexType *__restrict__ row_ptrs, - const IndexType *__restrict__ col_idxs, - const ValueType *__restrict__ values, remove_complex accuracy, - ValueType *__restrict__ block_data, +__launch_bounds__(warps_per_block* config::warp_size) adaptive_generate( + size_type num_rows, const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, + const ValueType* __restrict__ values, remove_complex accuracy, + ValueType* __restrict__ block_data, preconditioner::block_interleaved_storage_scheme storage_scheme, - remove_complex *__restrict__ conditioning, - precision_reduction *__restrict__ block_precisions, - const IndexType *__restrict__ block_ptrs, size_type num_blocks) + remove_complex* __restrict__ conditioning, + precision_reduction* __restrict__ block_precisions, + const IndexType* __restrict__ block_ptrs, size_type num_blocks) { // extract blocks const auto block_id = @@ -197,7 +197,7 @@ __launch_bounds__(warps_per_block *config::warp_size) adaptive_generate( ValueType, prec, copy_matrix( subwarp, block_size, row, 1, perm, trans_perm, - reinterpret_cast( + reinterpret_cast( block_data + storage_scheme.get_group_offset(block_id)) + storage_scheme.get_block_offset(block_id), storage_scheme.get_stride())); diff --git a/common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc b/common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc index c3fa889b210..2ca63166267 100644 --- a/common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc +++ b/common/cuda_hip/preconditioner/jacobi_kernels.hpp.inc @@ -32,9 +32,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. template __global__ -__launch_bounds__(warps_per_block *config::warp_size) void duplicate_array( - const precision_reduction *__restrict__ source, size_type source_size, - precision_reduction *__restrict__ dest, size_type dest_size) +__launch_bounds__(warps_per_block* config::warp_size) void duplicate_array( + const precision_reduction* __restrict__ source, size_type source_size, + precision_reduction* __restrict__ dest, size_type dest_size) { auto grid = group::this_grid(); if (grid.thread_rank() >= dest_size) { @@ -48,9 +48,9 @@ __launch_bounds__(warps_per_block *config::warp_size) void duplicate_array( template __global__ void compare_adjacent_rows(size_type num_rows, int32 max_block_size, - const IndexType *__restrict__ row_ptrs, - const IndexType *__restrict__ col_idx, - bool *__restrict__ matching_next_row) + const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idx, + bool* __restrict__ matching_next_row) { const auto warp = group::tiled_partition(group::this_thread_block()); @@ -93,8 +93,8 @@ __global__ void compare_adjacent_rows(size_type num_rows, int32 max_block_size, template __global__ void generate_natural_block_pointer( size_type num_rows, int32 max_block_size, - const bool *__restrict__ matching_next_row, - IndexType *__restrict__ block_ptrs, size_type *__restrict__ num_blocks_arr) + const bool* __restrict__ matching_next_row, + IndexType* __restrict__ block_ptrs, size_type* __restrict__ num_blocks_arr) { block_ptrs[0] = 0; if (num_rows == 0) { @@ -120,7 +120,7 @@ __global__ void generate_natural_block_pointer( template __global__ void agglomerate_supervariables_kernel( int32 max_block_size, size_type num_natural_blocks, - IndexType *__restrict__ block_ptrs, size_type *__restrict__ num_blocks_arr) + IndexType* __restrict__ block_ptrs, size_type* __restrict__ num_blocks_arr) { num_blocks_arr[0] = 0; if (num_natural_blocks == 0) { @@ -145,12 +145,12 @@ __global__ void agglomerate_supervariables_kernel( template -__global__ void __launch_bounds__(warps_per_block *config::warp_size) - transpose_jacobi(const ValueType *__restrict__ blocks, +__global__ void __launch_bounds__(warps_per_block* config::warp_size) + transpose_jacobi(const ValueType* __restrict__ blocks, preconditioner::block_interleaved_storage_scheme storage_scheme, - const IndexType *__restrict__ block_ptrs, - size_type num_blocks, ValueType *__restrict__ out_blocks) + const IndexType* __restrict__ block_ptrs, + size_type num_blocks, ValueType* __restrict__ out_blocks) { const auto block_id = thread::get_subwarp_id(); @@ -177,12 +177,12 @@ __global__ void __launch_bounds__(warps_per_block *config::warp_size) template __global__ void -__launch_bounds__(warps_per_block *config::warp_size) adaptive_transpose_jacobi( - const ValueType *__restrict__ blocks, +__launch_bounds__(warps_per_block* config::warp_size) adaptive_transpose_jacobi( + const ValueType* __restrict__ blocks, preconditioner::block_interleaved_storage_scheme storage_scheme, - const precision_reduction *__restrict__ block_precisions, - const IndexType *__restrict__ block_ptrs, size_type num_blocks, - ValueType *__restrict__ out_blocks) + const precision_reduction* __restrict__ block_precisions, + const IndexType* __restrict__ block_ptrs, size_type num_blocks, + ValueType* __restrict__ out_blocks) { const auto block_id = thread::get_subwarp_id(); @@ -199,11 +199,11 @@ __launch_bounds__(warps_per_block *config::warp_size) adaptive_transpose_jacobi( GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION( ValueType, block_precisions[block_id], auto local_block = - reinterpret_cast( + reinterpret_cast( blocks + storage_scheme.get_group_offset(block_id)) + storage_scheme.get_block_offset(block_id); auto local_out_block = - reinterpret_cast( + reinterpret_cast( out_blocks + storage_scheme.get_group_offset(block_id)) + storage_scheme.get_block_offset(block_id); for (IndexType i = 0; i < block_size; ++i) { diff --git a/common/cuda_hip/preconditioner/jacobi_simple_apply_kernel.hpp.inc b/common/cuda_hip/preconditioner/jacobi_simple_apply_kernel.hpp.inc index db73880732d..23d537b8128 100644 --- a/common/cuda_hip/preconditioner/jacobi_simple_apply_kernel.hpp.inc +++ b/common/cuda_hip/preconditioner/jacobi_simple_apply_kernel.hpp.inc @@ -35,11 +35,11 @@ namespace kernel { template -__global__ void __launch_bounds__(warps_per_block *config::warp_size) apply( - const ValueType *__restrict__ blocks, +__global__ void __launch_bounds__(warps_per_block* config::warp_size) apply( + const ValueType* __restrict__ blocks, preconditioner::block_interleaved_storage_scheme storage_scheme, - const IndexType *__restrict__ block_ptrs, size_type num_blocks, - const ValueType *__restrict__ b, int32 b_stride, ValueType *__restrict__ x, + const IndexType* __restrict__ block_ptrs, size_type num_blocks, + const ValueType* __restrict__ b, int32 b_stride, ValueType* __restrict__ x, int32 x_stride) { const auto block_id = @@ -60,20 +60,20 @@ __global__ void __launch_bounds__(warps_per_block *config::warp_size) apply( subwarp.thread_rank(), storage_scheme.get_stride(), x + block_ptrs[block_id] * x_stride, x_stride, - [](ValueType &result, const ValueType &out) { result = out; }); + [](ValueType& result, const ValueType& out) { result = out; }); } template -__global__ void __launch_bounds__(warps_per_block *config::warp_size) - adaptive_apply(const ValueType *__restrict__ blocks, +__global__ void __launch_bounds__(warps_per_block* config::warp_size) + adaptive_apply(const ValueType* __restrict__ blocks, preconditioner::block_interleaved_storage_scheme storage_scheme, - const precision_reduction *__restrict__ block_precisions, - const IndexType *__restrict__ block_ptrs, - size_type num_blocks, const ValueType *__restrict__ b, - int32 b_stride, ValueType *__restrict__ x, int32 x_stride) + const precision_reduction* __restrict__ block_precisions, + const IndexType* __restrict__ block_ptrs, + size_type num_blocks, const ValueType* __restrict__ b, + int32 b_stride, ValueType* __restrict__ x, int32 x_stride) { const auto block_id = thread::get_subwarp_id(); @@ -91,13 +91,13 @@ __global__ void __launch_bounds__(warps_per_block *config::warp_size) ValueType, block_precisions[block_id], multiply_vec( subwarp, block_size, v, - reinterpret_cast( + reinterpret_cast( blocks + storage_scheme.get_group_offset(block_id)) + storage_scheme.get_block_offset(block_id) + subwarp.thread_rank(), storage_scheme.get_stride(), x + block_ptrs[block_id] * x_stride, x_stride, - [](ValueType &result, const ValueType &out) { result = out; })); + [](ValueType& result, const ValueType& out) { result = out; })); } diff --git a/common/cuda_hip/solver/cb_gmres_kernels.hpp.inc b/common/cuda_hip/solver/cb_gmres_kernels.hpp.inc index 41110c05780..51f43494ade 100644 --- a/common/cuda_hip/solver/cb_gmres_kernels.hpp.inc +++ b/common/cuda_hip/solver/cb_gmres_kernels.hpp.inc @@ -35,7 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. template __global__ __launch_bounds__(default_block_size) void zero_matrix_kernel( - size_type m, size_type n, size_type stride, ValueType *__restrict__ array) + size_type m, size_type n, size_type stride, ValueType* __restrict__ array) { const auto tidx = thread::get_thread_id_flat(); if (tidx < n) { @@ -52,7 +52,7 @@ __global__ __launch_bounds__(default_block_size) void zero_matrix_kernel( template __global__ __launch_bounds__(block_size) void initialize_2_1_kernel( size_type num_rows, size_type num_rhs, size_type krylov_dim, - Accessor3d krylov_bases, ValueType *__restrict__ residual_norm_collection, + Accessor3d krylov_bases, ValueType* __restrict__ residual_norm_collection, size_type stride_residual_nc) { const auto global_id = thread::get_thread_id_flat(); @@ -84,11 +84,11 @@ __global__ __launch_bounds__(block_size) void initialize_2_1_kernel( template __global__ __launch_bounds__(block_size) void initialize_2_2_kernel( size_type num_rows, size_type num_rhs, - const ValueType *__restrict__ residual, size_type stride_residual, - const remove_complex *__restrict__ residual_norm, - ValueType *__restrict__ residual_norm_collection, Accessor3d krylov_bases, - ValueType *__restrict__ next_krylov_basis, size_type stride_next_krylov, - size_type *__restrict__ final_iter_nums) + const ValueType* __restrict__ residual, size_type stride_residual, + const remove_complex* __restrict__ residual_norm, + ValueType* __restrict__ residual_norm_collection, Accessor3d krylov_bases, + ValueType* __restrict__ next_krylov_basis, size_type stride_next_krylov, + size_type* __restrict__ final_iter_nums) { const auto global_id = thread::get_thread_id_flat(); const auto krylov_stride = @@ -113,8 +113,8 @@ __global__ __launch_bounds__(block_size) void initialize_2_2_kernel( __global__ __launch_bounds__(default_block_size) void increase_final_iteration_numbers_kernel( - size_type *__restrict__ final_iter_nums, - const stopping_status *__restrict__ stop_status, size_type total_number) + size_type* __restrict__ final_iter_nums, + const stopping_status* __restrict__ stop_status, size_type total_number) { const auto global_id = thread::get_thread_id_flat(); if (global_id < total_number) { @@ -126,9 +126,9 @@ __global__ template __global__ __launch_bounds__(default_dot_size) void multinorm2_kernel( size_type num_rows, size_type num_cols, - const ValueType *__restrict__ next_krylov_basis, - size_type stride_next_krylov, remove_complex *__restrict__ norms, - const stopping_status *__restrict__ stop_status) + const ValueType* __restrict__ next_krylov_basis, + size_type stride_next_krylov, remove_complex* __restrict__ norms, + const stopping_status* __restrict__ stop_status) { using rc_vtype = remove_complex; const auto tidx = threadIdx.x; @@ -141,9 +141,9 @@ __global__ __launch_bounds__(default_dot_size) void multinorm2_kernel( // Used that way to get around dynamic initialization warning and // template error when using `reduction_helper_array` directly in `reduce` __shared__ - UninitializedArray + UninitializedArray reduction_helper_array; - rc_vtype *__restrict__ reduction_helper = reduction_helper_array; + rc_vtype* __restrict__ reduction_helper = reduction_helper_array; rc_vtype local_res = zero(); if (col_idx < num_cols && !stop_status[col_idx].has_stopped()) { for (size_type i = start_row + tidy; i < end_row; @@ -159,7 +159,7 @@ __global__ __launch_bounds__(default_dot_size) void multinorm2_kernel( group::tiled_partition(group::this_thread_block()); const auto sum = reduce(tile_block, local_res, - [](const rc_vtype &a, const rc_vtype &b) { return a + b; }); + [](const rc_vtype& a, const rc_vtype& b) { return a + b; }); const auto new_col_idx = blockIdx.x * default_dot_dim + tidy; if (tidx == 0 && new_col_idx < num_cols && !stop_status[new_col_idx].has_stopped()) { @@ -173,9 +173,9 @@ template __global__ __launch_bounds__(default_dot_size) void multinorminf_without_stop_kernel( size_type num_rows, size_type num_cols, - const ValueType *__restrict__ next_krylov_basis, + const ValueType* __restrict__ next_krylov_basis, size_type stride_next_krylov, - remove_complex *__restrict__ norms, size_type stride_norms) + remove_complex* __restrict__ norms, size_type stride_norms) { using rc_vtype = remove_complex; const auto tidx = threadIdx.x; @@ -188,9 +188,9 @@ __global__ // Used that way to get around dynamic initialization warning and // template error when using `reduction_helper_array` directly in `reduce` __shared__ - UninitializedArray + UninitializedArray reduction_helper_array; - rc_vtype *__restrict__ reduction_helper = reduction_helper_array; + rc_vtype* __restrict__ reduction_helper = reduction_helper_array; rc_vtype local_max = zero(); if (col_idx < num_cols) { for (size_type i = start_row + tidy; i < end_row; @@ -207,7 +207,7 @@ __global__ const auto tile_block = group::tiled_partition(group::this_thread_block()); const auto value = - reduce(tile_block, local_max, [](const rc_vtype &a, const rc_vtype &b) { + reduce(tile_block, local_max, [](const rc_vtype& a, const rc_vtype& b) { return ((a >= b) ? a : b); }); const auto new_col_idx = blockIdx.x * default_dot_dim + tidy; @@ -222,11 +222,11 @@ __global__ template __global__ __launch_bounds__(default_dot_size) void multinorm2_inf_kernel( size_type num_rows, size_type num_cols, - const ValueType *__restrict__ next_krylov_basis, + const ValueType* __restrict__ next_krylov_basis, size_type stride_next_krylov, - remove_complex *__restrict__ norms1, - remove_complex *__restrict__ norms2, - const stopping_status *__restrict__ stop_status) + remove_complex* __restrict__ norms1, + remove_complex* __restrict__ norms2, + const stopping_status* __restrict__ stop_status) { using rc_vtype = remove_complex; const auto tidx = threadIdx.x; @@ -239,11 +239,11 @@ __global__ __launch_bounds__(default_dot_size) void multinorm2_inf_kernel( // Used that way to get around dynamic initialization warning and // template error when using `reduction_helper_array` directly in `reduce` __shared__ UninitializedArray< - rc_vtype, (1 + compute_inf) * default_dot_dim *(default_dot_dim + 1)> + rc_vtype, (1 + compute_inf) * default_dot_dim*(default_dot_dim + 1)> reduction_helper_array; - rc_vtype *__restrict__ reduction_helper_add = reduction_helper_array; - rc_vtype *__restrict__ reduction_helper_max = - static_cast(reduction_helper_array) + + rc_vtype* __restrict__ reduction_helper_add = reduction_helper_array; + rc_vtype* __restrict__ reduction_helper_max = + static_cast(reduction_helper_array) + default_dot_dim * (default_dot_dim + 1); rc_vtype local_res = zero(); rc_vtype local_max = zero(); @@ -269,12 +269,12 @@ __global__ __launch_bounds__(default_dot_size) void multinorm2_inf_kernel( group::tiled_partition(group::this_thread_block()); const auto sum = reduce(tile_block, local_res, - [](const rc_vtype &a, const rc_vtype &b) { return a + b; }); + [](const rc_vtype& a, const rc_vtype& b) { return a + b; }); rc_vtype reduced_max{}; if (compute_inf) { local_max = reduction_helper_max[tidy * (default_dot_dim + 1) + tidx]; reduced_max = reduce(tile_block, local_max, - [](const rc_vtype &a, const rc_vtype &b) { + [](const rc_vtype& a, const rc_vtype& b) { return ((a >= b) ? a : b); }); } @@ -291,12 +291,12 @@ __global__ __launch_bounds__(default_dot_size) void multinorm2_inf_kernel( template -__global__ __launch_bounds__(dot_dim *dot_dim) void multidot_kernel( +__global__ __launch_bounds__(dot_dim* dot_dim) void multidot_kernel( size_type num_rows, size_type num_cols, - const ValueType *__restrict__ next_krylov_basis, + const ValueType* __restrict__ next_krylov_basis, size_type stride_next_krylov, const Accessor3d krylov_bases, - ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, - const stopping_status *__restrict__ stop_status) + ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status* __restrict__ stop_status) { /* * In general in this kernel: @@ -320,7 +320,7 @@ __global__ __launch_bounds__(dot_dim *dot_dim) void multidot_kernel( // template error when using `reduction_helper_array` directly in `reduce` __shared__ UninitializedArray reduction_helper_array; - ValueType *__restrict__ reduction_helper = reduction_helper_array; + ValueType* __restrict__ reduction_helper = reduction_helper_array; ValueType local_res = zero(); if (col_idx < num_cols && !stop_status[col_idx].has_stopped()) { @@ -340,7 +340,7 @@ __global__ __launch_bounds__(dot_dim *dot_dim) void multidot_kernel( const auto tile_block = group::tiled_partition(thread_block); const auto sum = reduce(tile_block, local_res, - [](const ValueType &a, const ValueType &b) { return a + b; }); + [](const ValueType& a, const ValueType& b) { return a + b; }); if (tidx == 0 && new_col_idx < num_cols && !stop_status[new_col_idx].has_stopped()) { const auto hessenberg_idx = k * stride_hessenberg + new_col_idx; @@ -351,10 +351,10 @@ __global__ __launch_bounds__(dot_dim *dot_dim) void multidot_kernel( template __global__ __launch_bounds__(block_size) void singledot_kernel( - size_type num_rows, const ValueType *__restrict__ next_krylov_basis, + size_type num_rows, const ValueType* __restrict__ next_krylov_basis, size_type stride_next_krylov, const Accessor3d krylov_bases, - ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, - const stopping_status *__restrict__ stop_status) + ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status* __restrict__ stop_status) { /* * In general in this kernel: @@ -374,7 +374,7 @@ __global__ __launch_bounds__(block_size) void singledot_kernel( // Used that way to get around dynamic initialization warning and // template error when using `reduction_helper_array` directly in `reduce` __shared__ UninitializedArray reduction_helper_array; - ValueType *__restrict__ reduction_helper = reduction_helper_array; + ValueType* __restrict__ reduction_helper = reduction_helper_array; ValueType local_res = zero(); if (!stop_status[col_idx].has_stopped()) { @@ -390,7 +390,7 @@ __global__ __launch_bounds__(block_size) void singledot_kernel( auto thread_block = group::this_thread_block(); thread_block.sync(); reduce(thread_block, reduction_helper, - [](const ValueType &a, const ValueType &b) { return a + b; }); + [](const ValueType& a, const ValueType& b) { return a + b; }); if (tidx == 0 && !stop_status[col_idx].has_stopped()) { const auto hessenberg_idx = k * stride_hessenberg + col_idx; atomic_add(hessenberg_iter + hessenberg_idx, reduction_helper[0]); @@ -403,10 +403,10 @@ __global__ __launch_bounds__(block_size) void singledot_kernel( template __global__ __launch_bounds__(block_size) void update_next_krylov_kernel( size_type num_iters, size_type num_rows, size_type num_cols, - ValueType *__restrict__ next_krylov_basis, size_type stride_next_krylov, + ValueType* __restrict__ next_krylov_basis, size_type stride_next_krylov, const Accessor3d krylov_bases, - const ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, - const stopping_status *__restrict__ stop_status) + const ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status* __restrict__ stop_status) { const auto global_id = thread::get_thread_id_flat(); const auto row_idx = global_id / stride_next_krylov; @@ -432,11 +432,11 @@ __global__ __launch_bounds__(block_size) void update_next_krylov_kernel( template __global__ __launch_bounds__(block_size) void update_next_krylov_and_add_kernel( size_type num_iters, size_type num_rows, size_type num_cols, - ValueType *__restrict__ next_krylov_basis, size_type stride_next_krylov, - const Accessor3d krylov_bases, ValueType *__restrict__ hessenberg_iter, - size_type stride_hessenberg, const ValueType *__restrict__ buffer_iter, - size_type stride_buffer, const stopping_status *__restrict__ stop_status, - const stopping_status *__restrict__ reorth_status) + ValueType* __restrict__ next_krylov_basis, size_type stride_next_krylov, + const Accessor3d krylov_bases, ValueType* __restrict__ hessenberg_iter, + size_type stride_hessenberg, const ValueType* __restrict__ buffer_iter, + size_type stride_buffer, const stopping_status* __restrict__ stop_status, + const stopping_status* __restrict__ reorth_status) { const auto global_id = thread::get_thread_id_flat(); const auto row_idx = global_id / stride_next_krylov; @@ -464,12 +464,12 @@ __global__ __launch_bounds__(block_size) void update_next_krylov_and_add_kernel( // Must be called with at least `num_rhs` threads template __global__ __launch_bounds__(block_size) void check_arnoldi_norms( - size_type num_rhs, remove_complex *__restrict__ arnoldi_norm, - size_type stride_norm, ValueType *__restrict__ hessenberg_iter, + size_type num_rhs, remove_complex* __restrict__ arnoldi_norm, + size_type stride_norm, ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg, size_type iter, Accessor3d krylov_bases, - const stopping_status *__restrict__ stop_status, - stopping_status *__restrict__ reorth_status, - size_type *__restrict__ num_reorth) + const stopping_status* __restrict__ stop_status, + stopping_status* __restrict__ reorth_status, + size_type* __restrict__ num_reorth) { const remove_complex eta_squared = 1.0 / 2.0; const auto col_idx = thread::get_thread_id_flat(); @@ -499,8 +499,8 @@ __global__ __launch_bounds__(block_size) void check_arnoldi_norms( template __global__ __launch_bounds__(block_size) void set_scalar_kernel( size_type num_rhs, size_type num_blocks, - const RealValueType *__restrict__ residual_norm, size_type stride_residual, - const RealValueType *__restrict__ arnoldi_inf, size_type stride_inf, + const RealValueType* __restrict__ residual_norm, size_type stride_residual, + const RealValueType* __restrict__ arnoldi_inf, size_type stride_inf, Accessor3d krylov_bases) { static_assert(!is_complex_s::value, @@ -532,10 +532,10 @@ __global__ __launch_bounds__(block_size) void set_scalar_kernel( template __global__ __launch_bounds__(block_size) void update_krylov_next_krylov_kernel( size_type iter, size_type num_rows, size_type num_cols, - ValueType *__restrict__ next_krylov_basis, size_type stride_next_krylov, - Accessor3d krylov_bases, const ValueType *__restrict__ hessenberg_iter, + ValueType* __restrict__ next_krylov_basis, size_type stride_next_krylov, + Accessor3d krylov_bases, const ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg, - const stopping_status *__restrict__ stop_status) + const stopping_status* __restrict__ stop_status) { const auto global_id = thread::get_thread_id_flat(); const auto row_idx = global_id / stride_next_krylov; @@ -561,10 +561,10 @@ __global__ __launch_bounds__(block_size) void update_krylov_next_krylov_kernel( template __global__ __launch_bounds__(block_size) void calculate_Qy_kernel( size_type num_rows, size_type num_cols, const Accessor3d krylov_bases, - const ValueType *__restrict__ y, size_type stride_y, - ValueType *__restrict__ before_preconditioner, + const ValueType* __restrict__ y, size_type stride_y, + ValueType* __restrict__ before_preconditioner, size_type stride_preconditioner, - const size_type *__restrict__ final_iter_nums) + const size_type* __restrict__ final_iter_nums) { const auto global_id = thread::get_thread_id_flat(); const auto row_id = global_id / stride_preconditioner; diff --git a/common/cuda_hip/solver/common_gmres_kernels.hpp.inc b/common/cuda_hip/solver/common_gmres_kernels.hpp.inc index 84ee6f52f03..4e91879f22c 100644 --- a/common/cuda_hip/solver/common_gmres_kernels.hpp.inc +++ b/common/cuda_hip/solver/common_gmres_kernels.hpp.inc @@ -35,11 +35,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. template __global__ __launch_bounds__(block_size) void initialize_1_kernel( size_type num_rows, size_type num_cols, size_type krylov_dim, - const ValueType *__restrict__ b, size_type stride_b, - ValueType *__restrict__ residual, size_type stride_residual, - ValueType *__restrict__ givens_sin, size_type stride_sin, - ValueType *__restrict__ givens_cos, size_type stride_cos, - stopping_status *__restrict__ stop_status) + const ValueType* __restrict__ b, size_type stride_b, + ValueType* __restrict__ residual, size_type stride_residual, + ValueType* __restrict__ givens_sin, size_type stride_sin, + ValueType* __restrict__ givens_cos, size_type stride_cos, + stopping_status* __restrict__ stop_status) { const auto global_id = thread::get_thread_id_flat(); @@ -68,9 +68,9 @@ __global__ __launch_bounds__(block_size) void initialize_1_kernel( template __device__ void calculate_sin_and_cos_kernel( size_type col_idx, size_type num_cols, size_type iter, - const ValueType &this_hess, const ValueType &next_hess, - ValueType *givens_sin, size_type stride_sin, ValueType *givens_cos, - size_type stride_cos, ValueType ®ister_sin, ValueType ®ister_cos) + const ValueType& this_hess, const ValueType& next_hess, + ValueType* givens_sin, size_type stride_sin, ValueType* givens_cos, + size_type stride_cos, ValueType& register_sin, ValueType& register_cos) { if (this_hess == zero()) { register_cos = zero(); @@ -91,9 +91,9 @@ __device__ void calculate_sin_and_cos_kernel( template __device__ void calculate_residual_norm_kernel( size_type col_idx, size_type num_cols, size_type iter, - const ValueType ®ister_sin, const ValueType ®ister_cos, - remove_complex *residual_norm, - ValueType *residual_norm_collection, + const ValueType& register_sin, const ValueType& register_cos, + remove_complex* residual_norm, + ValueType* residual_norm_collection, size_type stride_residual_norm_collection) { const auto this_rnc = @@ -112,13 +112,13 @@ __device__ void calculate_residual_norm_kernel( template __global__ __launch_bounds__(block_size) void givens_rotation_kernel( size_type num_rows, size_type num_cols, size_type iter, - ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, - ValueType *__restrict__ givens_sin, size_type stride_sin, - ValueType *__restrict__ givens_cos, size_type stride_cos, - remove_complex *__restrict__ residual_norm, - ValueType *__restrict__ residual_norm_collection, + ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg, + ValueType* __restrict__ givens_sin, size_type stride_sin, + ValueType* __restrict__ givens_cos, size_type stride_cos, + remove_complex* __restrict__ residual_norm, + ValueType* __restrict__ residual_norm_collection, size_type stride_residual_norm_collection, - const stopping_status *__restrict__ stop_status) + const stopping_status* __restrict__ stop_status) { const auto col_idx = thread::get_thread_id_flat(); @@ -170,11 +170,11 @@ __global__ __launch_bounds__(block_size) void givens_rotation_kernel( template __global__ __launch_bounds__(block_size) void solve_upper_triangular_kernel( size_type num_cols, size_type num_rhs, - const ValueType *__restrict__ residual_norm_collection, + const ValueType* __restrict__ residual_norm_collection, size_type stride_residual_norm_collection, - const ValueType *__restrict__ hessenberg, size_type stride_hessenberg, - ValueType *__restrict__ y, size_type stride_y, - const size_type *__restrict__ final_iter_nums) + const ValueType* __restrict__ hessenberg, size_type stride_hessenberg, + ValueType* __restrict__ y, size_type stride_y, + const size_type* __restrict__ final_iter_nums) { const auto col_idx = thread::get_thread_id_flat(); diff --git a/common/cuda_hip/solver/gmres_kernels.hpp.inc b/common/cuda_hip/solver/gmres_kernels.hpp.inc index 1f44ea93cb8..d5cfa42b890 100644 --- a/common/cuda_hip/solver/gmres_kernels.hpp.inc +++ b/common/cuda_hip/solver/gmres_kernels.hpp.inc @@ -37,11 +37,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. template __global__ __launch_bounds__(block_size) void initialize_2_2_kernel( size_type num_rows, size_type num_rhs, - const ValueType *__restrict__ residual, size_type stride_residual, - const remove_complex *__restrict__ residual_norm, - ValueType *__restrict__ residual_norm_collection, - ValueType *__restrict__ krylov_bases, size_type stride_krylov, - size_type *__restrict__ final_iter_nums) + const ValueType* __restrict__ residual, size_type stride_residual, + const remove_complex* __restrict__ residual_norm, + ValueType* __restrict__ residual_norm_collection, + ValueType* __restrict__ krylov_bases, size_type stride_krylov, + size_type* __restrict__ final_iter_nums) { const auto global_id = thread::get_thread_id_flat(); const auto row_idx = global_id / num_rhs; @@ -62,8 +62,8 @@ __global__ __launch_bounds__(block_size) void initialize_2_2_kernel( __global__ __launch_bounds__(default_block_size) void increase_final_iteration_numbers_kernel( - size_type *__restrict__ final_iter_nums, - const stopping_status *__restrict__ stop_status, size_type total_number) + size_type* __restrict__ final_iter_nums, + const stopping_status* __restrict__ stop_status, size_type total_number) { const auto global_id = thread::get_thread_id_flat(); if (global_id < total_number) { @@ -75,10 +75,10 @@ __global__ template __global__ __launch_bounds__(default_dot_size) void multidot_kernel( size_type k, size_type num_rows, size_type num_cols, - const ValueType *__restrict__ krylov_bases, - const ValueType *__restrict__ next_krylov_basis, size_type stride_krylov, - ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, - const stopping_status *__restrict__ stop_status) + const ValueType* __restrict__ krylov_bases, + const ValueType* __restrict__ next_krylov_basis, size_type stride_krylov, + ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status* __restrict__ stop_status) { const auto tidx = threadIdx.x; const auto tidy = threadIdx.y; @@ -90,9 +90,9 @@ __global__ __launch_bounds__(default_dot_size) void multidot_kernel( // Used that way to get around dynamic initialization warning and // template error when using `reduction_helper_array` directly in `reduce` __shared__ - UninitializedArray + UninitializedArray reduction_helper_array; - ValueType *__restrict__ reduction_helper = reduction_helper_array; + ValueType* __restrict__ reduction_helper = reduction_helper_array; ValueType local_res = zero(); if (col_idx < num_cols && !stop_status[col_idx].has_stopped()) { @@ -110,7 +110,7 @@ __global__ __launch_bounds__(default_dot_size) void multidot_kernel( group::tiled_partition(group::this_thread_block()); const auto sum = reduce(tile_block, local_res, - [](const ValueType &a, const ValueType &b) { return a + b; }); + [](const ValueType& a, const ValueType& b) { return a + b; }); const auto new_col_idx = blockIdx.x * default_dot_dim + tidy; if (tidx == 0 && new_col_idx < num_cols && !stop_status[new_col_idx].has_stopped()) { @@ -125,10 +125,10 @@ __global__ __launch_bounds__(default_dot_size) void multidot_kernel( template __global__ __launch_bounds__(block_size) void update_next_krylov_kernel( size_type k, size_type num_rows, size_type num_cols, - const ValueType *__restrict__ krylov_bases, - ValueType *__restrict__ next_krylov_basis, size_type stride_krylov, - const ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, - const stopping_status *__restrict__ stop_status) + const ValueType* __restrict__ krylov_bases, + ValueType* __restrict__ next_krylov_basis, size_type stride_krylov, + const ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status* __restrict__ stop_status) { const auto global_id = thread::get_thread_id_flat(); const auto row_idx = global_id / stride_krylov; @@ -151,10 +151,10 @@ __global__ __launch_bounds__(block_size) void update_next_krylov_kernel( template __global__ __launch_bounds__(block_size) void update_hessenberg_2_kernel( size_type iter, size_type num_rows, size_type num_cols, - const ValueType *__restrict__ next_krylov_basis, - size_type stride_next_krylov, ValueType *__restrict__ hessenberg_iter, + const ValueType* __restrict__ next_krylov_basis, + size_type stride_next_krylov, ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg, - const stopping_status *__restrict__ stop_status) + const stopping_status* __restrict__ stop_status) { const auto tidx = threadIdx.x; const auto col_idx = blockIdx.x; @@ -162,7 +162,7 @@ __global__ __launch_bounds__(block_size) void update_hessenberg_2_kernel( // Used that way to get around dynamic initialization warning and // template error when using `reduction_helper_array` directly in `reduce` __shared__ UninitializedArray reduction_helper_array; - ValueType *__restrict__ reduction_helper = reduction_helper_array; + ValueType* __restrict__ reduction_helper = reduction_helper_array; if (col_idx < num_cols && !stop_status[col_idx].has_stopped()) { ValueType local_res{}; @@ -177,7 +177,7 @@ __global__ __launch_bounds__(block_size) void update_hessenberg_2_kernel( // Perform thread block reduction. Result is in reduction_helper[0] reduce(group::this_thread_block(), reduction_helper, - [](const ValueType &a, const ValueType &b) { return a + b; }); + [](const ValueType& a, const ValueType& b) { return a + b; }); if (tidx == 0) { hessenberg_iter[(iter + 1) * stride_hessenberg + col_idx] = @@ -192,9 +192,9 @@ __global__ __launch_bounds__(block_size) void update_hessenberg_2_kernel( template __global__ __launch_bounds__(block_size) void update_krylov_kernel( size_type iter, size_type num_rows, size_type num_cols, - ValueType *__restrict__ krylov_bases, size_type stride_krylov, - const ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, - const stopping_status *__restrict__ stop_status) + ValueType* __restrict__ krylov_bases, size_type stride_krylov, + const ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status* __restrict__ stop_status) { const auto global_id = thread::get_thread_id_flat(); const auto row_idx = global_id / stride_krylov; @@ -216,11 +216,11 @@ __global__ __launch_bounds__(block_size) void update_krylov_kernel( template __global__ __launch_bounds__(block_size) void calculate_Qy_kernel( size_type num_rows, size_type num_cols, size_type num_rhs, - const ValueType *__restrict__ krylov_bases, size_type stride_krylov, - const ValueType *__restrict__ y, size_type stride_y, - ValueType *__restrict__ before_preconditioner, + const ValueType* __restrict__ krylov_bases, size_type stride_krylov, + const ValueType* __restrict__ y, size_type stride_y, + ValueType* __restrict__ before_preconditioner, size_type stride_preconditioner, - const size_type *__restrict__ final_iter_nums) + const size_type* __restrict__ final_iter_nums) { const auto global_id = thread::get_thread_id_flat(); const auto row_id = global_id / stride_preconditioner; diff --git a/common/cuda_hip/solver/idr_kernels.hpp.inc b/common/cuda_hip/solver/idr_kernels.hpp.inc index 52e9a3313a7..d4b49a0fc7f 100644 --- a/common/cuda_hip/solver/idr_kernels.hpp.inc +++ b/common/cuda_hip/solver/idr_kernels.hpp.inc @@ -32,8 +32,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. template __global__ __launch_bounds__(default_block_size) void initialize_m_kernel( - size_type subspace_dim, size_type nrhs, ValueType *__restrict__ m_values, - size_type m_stride, stopping_status *__restrict__ stop_status) + size_type subspace_dim, size_type nrhs, ValueType* __restrict__ m_values, + size_type m_stride, stopping_status* __restrict__ stop_status) { const auto global_id = thread::get_thread_id_flat(); const auto row = global_id / m_stride; @@ -53,16 +53,16 @@ __global__ __launch_bounds__(default_block_size) void initialize_m_kernel( template __global__ __launch_bounds__(block_size) void orthonormalize_subspace_vectors_kernel( - size_type num_rows, size_type num_cols, ValueType *__restrict__ values, + size_type num_rows, size_type num_cols, ValueType* __restrict__ values, size_type stride) { const auto tidx = thread::get_thread_id_flat(); __shared__ UninitializedArray reduction_helper_array; // they are not be used in the same time. - ValueType *reduction_helper = reduction_helper_array; + ValueType* reduction_helper = reduction_helper_array; auto reduction_helper_real = - reinterpret_cast *>(reduction_helper); + reinterpret_cast*>(reduction_helper); for (size_type row = 0; row < num_rows; row++) { for (size_type i = 0; i < row; i++) { @@ -76,7 +76,7 @@ __global__ reduction_helper[tidx] = dot; reduce( group::this_thread_block(), reduction_helper, - [](const ValueType &a, const ValueType &b) { return a + b; }); + [](const ValueType& a, const ValueType& b) { return a + b; }); __syncthreads(); dot = reduction_helper[0]; @@ -93,8 +93,8 @@ __global__ __syncthreads(); reduction_helper_real[tidx] = norm; reduce(group::this_thread_block(), reduction_helper_real, - [](const remove_complex &a, - const remove_complex &b) { return a + b; }); + [](const remove_complex& a, + const remove_complex& b) { return a + b; }); __syncthreads(); norm = sqrt(reduction_helper_real[0]); @@ -109,10 +109,10 @@ template __global__ __launch_bounds__(default_block_size) void solve_lower_triangular_kernel( size_type subspace_dim, size_type nrhs, - const ValueType *__restrict__ m_values, size_type m_stride, - const ValueType *__restrict__ f_values, size_type f_stride, - ValueType *__restrict__ c_values, size_type c_stride, - const stopping_status *__restrict__ stop_status) + const ValueType* __restrict__ m_values, size_type m_stride, + const ValueType* __restrict__ f_values, size_type f_stride, + ValueType* __restrict__ c_values, size_type c_stride, + const stopping_status* __restrict__ stop_status) { const auto global_id = thread::get_thread_id_flat(); @@ -137,11 +137,11 @@ __global__ template __global__ __launch_bounds__(default_block_size) void step_1_kernel( size_type k, size_type num_rows, size_type subspace_dim, size_type nrhs, - const ValueType *__restrict__ residual_values, size_type residual_stride, - const ValueType *__restrict__ c_values, size_type c_stride, - const ValueType *__restrict__ g_values, size_type g_stride, - ValueType *__restrict__ v_values, size_type v_stride, - const stopping_status *__restrict__ stop_status) + const ValueType* __restrict__ residual_values, size_type residual_stride, + const ValueType* __restrict__ c_values, size_type c_stride, + const ValueType* __restrict__ g_values, size_type g_stride, + ValueType* __restrict__ v_values, size_type v_stride, + const stopping_status* __restrict__ stop_status) { const auto global_id = thread::get_thread_id_flat(); const auto row = global_id / nrhs; @@ -165,11 +165,11 @@ __global__ __launch_bounds__(default_block_size) void step_1_kernel( template __global__ __launch_bounds__(default_block_size) void step_2_kernel( size_type k, size_type num_rows, size_type subspace_dim, size_type nrhs, - const ValueType *__restrict__ omega_values, - const ValueType *__restrict__ v_values, size_type v_stride, - const ValueType *__restrict__ c_values, size_type c_stride, - ValueType *__restrict__ u_values, size_type u_stride, - const stopping_status *__restrict__ stop_status) + const ValueType* __restrict__ omega_values, + const ValueType* __restrict__ v_values, size_type v_stride, + const ValueType* __restrict__ c_values, size_type c_stride, + ValueType* __restrict__ u_values, size_type u_stride, + const stopping_status* __restrict__ stop_status) { const auto global_id = thread::get_thread_id_flat(); const auto row = global_id / nrhs; @@ -192,10 +192,10 @@ __global__ __launch_bounds__(default_block_size) void step_2_kernel( template __global__ __launch_bounds__(default_dot_size) void multidot_kernel( - size_type num_rows, size_type nrhs, const ValueType *__restrict__ p_i, - const ValueType *__restrict__ g_k, size_type g_k_stride, - ValueType *__restrict__ alpha, - const stopping_status *__restrict__ stop_status) + size_type num_rows, size_type nrhs, const ValueType* __restrict__ p_i, + const ValueType* __restrict__ g_k, size_type g_k_stride, + ValueType* __restrict__ alpha, + const stopping_status* __restrict__ stop_status) { const auto tidx = threadIdx.x; const auto tidy = threadIdx.y; @@ -207,9 +207,9 @@ __global__ __launch_bounds__(default_dot_size) void multidot_kernel( // Used that way to get around dynamic initialization warning and // template error when using `reduction_helper_array` directly in `reduce` __shared__ - UninitializedArray + UninitializedArray reduction_helper_array; - ValueType *__restrict__ reduction_helper = reduction_helper_array; + ValueType* __restrict__ reduction_helper = reduction_helper_array; ValueType local_res = zero(); if (rhs < nrhs && !stop_status[rhs].has_stopped()) { @@ -226,7 +226,7 @@ __global__ __launch_bounds__(default_dot_size) void multidot_kernel( group::tiled_partition(group::this_thread_block()); const auto sum = reduce(tile_block, local_res, - [](const ValueType &a, const ValueType &b) { return a + b; }); + [](const ValueType& a, const ValueType& b) { return a + b; }); const auto new_rhs = blockIdx.x * default_dot_dim + tidy; if (tidx == 0 && new_rhs < nrhs && !stop_status[new_rhs].has_stopped()) { atomic_add(alpha + new_rhs, sum); @@ -237,11 +237,11 @@ __global__ __launch_bounds__(default_dot_size) void multidot_kernel( template __global__ __launch_bounds__(block_size) void update_g_k_and_u_kernel( size_type k, size_type i, size_type size, size_type nrhs, - const ValueType *__restrict__ alpha, const ValueType *__restrict__ m_values, - size_type m_stride, const ValueType *__restrict__ g_values, - size_type g_stride, ValueType *__restrict__ g_k_values, - size_type g_k_stride, ValueType *__restrict__ u_values, size_type u_stride, - const stopping_status *__restrict__ stop_status) + const ValueType* __restrict__ alpha, const ValueType* __restrict__ m_values, + size_type m_stride, const ValueType* __restrict__ g_values, + size_type g_stride, ValueType* __restrict__ g_k_values, + size_type g_k_stride, ValueType* __restrict__ u_values, size_type u_stride, + const stopping_status* __restrict__ stop_status) { const auto tidx = thread::get_thread_id_flat(); const auto row = tidx / g_k_stride; @@ -264,9 +264,9 @@ __global__ __launch_bounds__(block_size) void update_g_k_and_u_kernel( template __global__ __launch_bounds__(block_size) void update_g_kernel( size_type k, size_type size, size_type nrhs, - const ValueType *__restrict__ g_k_values, size_type g_k_stride, - ValueType *__restrict__ g_values, size_type g_stride, - const stopping_status *__restrict__ stop_status) + const ValueType* __restrict__ g_k_values, size_type g_k_stride, + ValueType* __restrict__ g_values, size_type g_stride, + const stopping_status* __restrict__ stop_status) { const auto tidx = thread::get_thread_id_flat(); const auto row = tidx / g_k_stride; @@ -286,13 +286,13 @@ __global__ __launch_bounds__(block_size) void update_g_kernel( template __global__ __launch_bounds__(default_block_size) void update_x_r_and_f_kernel( size_type k, size_type size, size_type subspace_dim, size_type nrhs, - const ValueType *__restrict__ m_values, size_type m_stride, - const ValueType *__restrict__ g_values, size_type g_stride, - const ValueType *__restrict__ u_values, size_type u_stride, - ValueType *__restrict__ f_values, size_type f_stride, - ValueType *__restrict__ r_values, size_type r_stride, - ValueType *__restrict__ x_values, size_type x_stride, - const stopping_status *__restrict__ stop_status) + const ValueType* __restrict__ m_values, size_type m_stride, + const ValueType* __restrict__ g_values, size_type g_stride, + const ValueType* __restrict__ u_values, size_type u_stride, + ValueType* __restrict__ f_values, size_type f_stride, + ValueType* __restrict__ r_values, size_type r_stride, + ValueType* __restrict__ x_values, size_type x_stride, + const stopping_status* __restrict__ stop_status) { const auto global_id = thread::get_thread_id_flat(); const auto row = global_id / x_stride; @@ -321,10 +321,10 @@ __global__ __launch_bounds__(default_block_size) void update_x_r_and_f_kernel( template __global__ __launch_bounds__(config::warp_size) void compute_omega_kernel( size_type nrhs, const remove_complex kappa, - const ValueType *__restrict__ tht, - const remove_complex *__restrict__ residual_norm, - ValueType *__restrict__ omega, - const stopping_status *__restrict__ stop_status) + const ValueType* __restrict__ tht, + const remove_complex* __restrict__ residual_norm, + ValueType* __restrict__ omega, + const stopping_status* __restrict__ stop_status) { const auto global_id = thread::get_thread_id_flat(); diff --git a/common/unified/base/kernel_launch.hpp b/common/unified/base/kernel_launch.hpp index 5e5e1914476..6b3a698768c 100644 --- a/common/unified/base/kernel_launch.hpp +++ b/common/unified/base/kernel_launch.hpp @@ -169,14 +169,14 @@ namespace GKO_DEVICE_NAMESPACE { */ template struct matrix_accessor { - ValueType *data; + ValueType* data; size_type stride; /** * @internal * Returns a reference to the element at position (row, col). */ - GKO_INLINE GKO_ATTRIBUTES ValueType &operator()(size_type row, + GKO_INLINE GKO_ATTRIBUTES ValueType& operator()(size_type row, size_type col) { return data[row * stride + col]; @@ -187,7 +187,7 @@ struct matrix_accessor { * Returns a reference to the element at position idx in the underlying * storage. */ - GKO_INLINE GKO_ATTRIBUTES ValueType &operator[](size_type idx) + GKO_INLINE GKO_ATTRIBUTES ValueType& operator[](size_type idx) { return data[idx]; } @@ -219,36 +219,36 @@ struct to_device_type_impl { }; template -struct to_device_type_impl *&> { +struct to_device_type_impl*&> { using type = matrix_accessor>; - static type map_to_device(matrix::Dense *mtx) + static type map_to_device(matrix::Dense* mtx) { return {as_device_type(mtx->get_values()), mtx->get_stride()}; } }; template -struct to_device_type_impl *&> { +struct to_device_type_impl*&> { using type = matrix_accessor>; - static type map_to_device(const matrix::Dense *mtx) + static type map_to_device(const matrix::Dense* mtx) { return {as_device_type(mtx->get_const_values()), mtx->get_stride()}; } }; template -struct to_device_type_impl &> { - using type = device_type *; - static type map_to_device(Array &array) +struct to_device_type_impl&> { + using type = device_type*; + static type map_to_device(Array& array) { return as_device_type(array.get_data()); } }; template -struct to_device_type_impl &> { - using type = const device_type *; - static type map_to_device(const Array &array) +struct to_device_type_impl&> { + using type = const device_type*; + static type map_to_device(const Array& array) { return as_device_type(array.get_const_data()); } @@ -256,7 +256,7 @@ struct to_device_type_impl &> { template -typename to_device_type_impl::type map_to_device(T &¶m) +typename to_device_type_impl::type map_to_device(T&& param) { return to_device_type_impl::map_to_device(param); } diff --git a/common/unified/base/kernel_launch_solver.hpp b/common/unified/base/kernel_launch_solver.hpp index 0f859631919..6c8a1296b83 100644 --- a/common/unified/base/kernel_launch_solver.hpp +++ b/common/unified/base/kernel_launch_solver.hpp @@ -49,7 +49,7 @@ namespace GKO_DEVICE_NAMESPACE { */ template struct default_stride_dense_wrapper { - ValueType *data; + ValueType* data; }; @@ -88,7 +88,7 @@ struct device_unpack_solver_impl> { */ template default_stride_dense_wrapper> default_stride( - matrix::Dense *mtx) + matrix::Dense* mtx) { return {as_device_type(mtx->get_values())}; } @@ -99,7 +99,7 @@ default_stride_dense_wrapper> default_stride( */ template default_stride_dense_wrapper> default_stride( - const matrix::Dense *mtx) + const matrix::Dense* mtx) { return {as_device_type(mtx->get_const_values())}; } @@ -112,7 +112,7 @@ default_stride_dense_wrapper> default_stride( * pointer. */ template -device_type *row_vector(matrix::Dense *mtx) +device_type* row_vector(matrix::Dense* mtx) { GKO_ASSERT(mtx->get_size()[0] == 1); return as_device_type(mtx->get_values()); @@ -123,7 +123,7 @@ device_type *row_vector(matrix::Dense *mtx) * @copydoc row_vector(matrix::Dense*) */ template -const device_type *row_vector(const matrix::Dense *mtx) +const device_type* row_vector(const matrix::Dense* mtx) { GKO_ASSERT(mtx->get_size()[0] == 1); return as_device_type(mtx->get_const_values()); diff --git a/common/unified/components/precision_conversion.cpp b/common/unified/components/precision_conversion.cpp index 09ad03125da..27674daba61 100644 --- a/common/unified/components/precision_conversion.cpp +++ b/common/unified/components/precision_conversion.cpp @@ -44,7 +44,7 @@ namespace components { template void convert_precision(std::shared_ptr exec, - size_type size, const SourceType *in, TargetType *out) + size_type size, const SourceType* in, TargetType* out) { run_kernel( exec, diff --git a/common/unified/matrix/coo_kernels.cpp b/common/unified/matrix/coo_kernels.cpp index 095bbdf0e65..b9ccb45aafc 100644 --- a/common/unified/matrix/coo_kernels.cpp +++ b/common/unified/matrix/coo_kernels.cpp @@ -52,8 +52,8 @@ namespace coo { template void extract_diagonal(std::shared_ptr exec, - const matrix::Coo *orig, - matrix::Diagonal *diag) + const matrix::Coo* orig, + matrix::Diagonal* diag) { run_kernel( exec, diff --git a/common/unified/matrix/csr_kernels.cpp b/common/unified/matrix/csr_kernels.cpp index 5134e0b88ee..0512a7c8888 100644 --- a/common/unified/matrix/csr_kernels.cpp +++ b/common/unified/matrix/csr_kernels.cpp @@ -55,8 +55,8 @@ namespace csr { template void invert_permutation(std::shared_ptr exec, - size_type size, const IndexType *permutation_indices, - IndexType *inv_permutation) + size_type size, const IndexType* permutation_indices, + IndexType* inv_permutation) { run_kernel( exec, @@ -71,9 +71,9 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INVERT_PERMUTATION_KERNEL); template void inverse_column_permute(std::shared_ptr exec, - const IndexType *perm, - const matrix::Csr *orig, - matrix::Csr *column_permuted) + const IndexType* perm, + const matrix::Csr* orig, + matrix::Csr* column_permuted) { auto num_rows = orig->get_size()[0]; auto nnz = orig->get_num_stored_elements(); diff --git a/common/unified/matrix/dense_kernels.cpp b/common/unified/matrix/dense_kernels.cpp index 32234b03dfe..a06d8e1eef2 100644 --- a/common/unified/matrix/dense_kernels.cpp +++ b/common/unified/matrix/dense_kernels.cpp @@ -52,8 +52,8 @@ namespace dense { template void copy(std::shared_ptr exec, - const matrix::Dense *input, - matrix::Dense *output) + const matrix::Dense* input, + matrix::Dense* output) { run_kernel( exec, @@ -69,7 +69,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION_OR_COPY( template void fill(std::shared_ptr exec, - matrix::Dense *mat, ValueType value) + matrix::Dense* mat, ValueType value) { run_kernel( exec, @@ -84,7 +84,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_FILL_KERNEL); template void scale(std::shared_ptr exec, - const matrix::Dense *alpha, matrix::Dense *x) + const matrix::Dense* alpha, matrix::Dense* x) { if (alpha->get_size()[1] > 1) { run_kernel( @@ -108,8 +108,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE(GKO_DECLARE_DENSE_SCALE_KERNEL); template void inv_scale(std::shared_ptr exec, - const matrix::Dense *alpha, - matrix::Dense *x) + const matrix::Dense* alpha, + matrix::Dense* x) { if (alpha->get_size()[1] > 1) { run_kernel( @@ -134,8 +134,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE( template void add_scaled(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Dense *x, matrix::Dense *y) + const matrix::Dense* alpha, + const matrix::Dense* x, matrix::Dense* y) { if (alpha->get_size()[1] > 1) { run_kernel( @@ -160,8 +160,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE( template void sub_scaled(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Dense *x, matrix::Dense *y) + const matrix::Dense* alpha, + const matrix::Dense* x, matrix::Dense* y) { if (alpha->get_size()[1] > 1) { run_kernel( @@ -186,9 +186,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE( template void add_scaled_diag(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Diagonal *x, - matrix::Dense *y) + const matrix::Dense* alpha, + const matrix::Diagonal* x, + matrix::Dense* y) { const auto diag_values = x->get_const_values(); run_kernel( @@ -204,9 +204,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_ADD_SCALED_DIAG_KERNEL); template void sub_scaled_diag(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Diagonal *x, - matrix::Dense *y) + const matrix::Dense* alpha, + const matrix::Diagonal* x, + matrix::Dense* y) { const auto diag_values = x->get_const_values(); run_kernel( @@ -222,9 +222,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SUB_SCALED_DIAG_KERNEL); template void symm_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Dense *orig, - matrix::Dense *permuted) + const Array* permutation_indices, + const matrix::Dense* orig, + matrix::Dense* permuted) { run_kernel( exec, @@ -240,9 +240,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inv_symm_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Dense *orig, - matrix::Dense *permuted) + const Array* permutation_indices, + const matrix::Dense* orig, + matrix::Dense* permuted) { run_kernel( exec, @@ -258,9 +258,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void row_gather(std::shared_ptr exec, - const Array *row_indices, - const matrix::Dense *orig, - matrix::Dense *row_gathered) + const Array* row_indices, + const matrix::Dense* orig, + matrix::Dense* row_gathered) { run_kernel( exec, @@ -277,9 +277,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Dense *orig, - matrix::Dense *column_permuted) + const Array* permutation_indices, + const matrix::Dense* orig, + matrix::Dense* column_permuted) { run_kernel( exec, @@ -295,9 +295,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inverse_row_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Dense *orig, - matrix::Dense *row_permuted) + const Array* permutation_indices, + const matrix::Dense* orig, + matrix::Dense* row_permuted) { run_kernel( exec, @@ -313,9 +313,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inverse_column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Dense *orig, - matrix::Dense *column_permuted) + const Array* permutation_indices, + const matrix::Dense* orig, + matrix::Dense* column_permuted) { run_kernel( exec, @@ -331,8 +331,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Dense *orig, - matrix::Diagonal *diag) + const matrix::Dense* orig, + matrix::Diagonal* diag) { run_kernel( exec, @@ -345,7 +345,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_EXTRACT_DIAGONAL_KERNEL); template void inplace_absolute_dense(std::shared_ptr exec, - matrix::Dense *source) + matrix::Dense* source) { run_kernel( exec, @@ -360,8 +360,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_INPLACE_ABSOLUTE_DENSE_KERNEL); template void outplace_absolute_dense(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Dense> *result) + const matrix::Dense* source, + matrix::Dense>* result) { run_kernel( exec, @@ -376,8 +376,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_OUTPLACE_ABSOLUTE_DENSE_KERNEL); template void make_complex(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Dense> *result) + const matrix::Dense* source, + matrix::Dense>* result) { run_kernel( exec, @@ -392,8 +392,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MAKE_COMPLEX_KERNEL); template void get_real(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Dense> *result) + const matrix::Dense* source, + matrix::Dense>* result) { run_kernel( exec, @@ -408,8 +408,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GET_REAL_KERNEL); template void get_imag(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Dense> *result) + const matrix::Dense* source, + matrix::Dense>* result) { run_kernel( exec, diff --git a/common/unified/matrix/diagonal_kernels.cpp b/common/unified/matrix/diagonal_kernels.cpp index e39bd40e207..b6ad625c7ed 100644 --- a/common/unified/matrix/diagonal_kernels.cpp +++ b/common/unified/matrix/diagonal_kernels.cpp @@ -52,9 +52,9 @@ namespace diagonal { template void apply_to_dense(std::shared_ptr exec, - const matrix::Diagonal *a, - const matrix::Dense *b, - matrix::Dense *c) + const matrix::Diagonal* a, + const matrix::Dense* b, + matrix::Dense* c) { run_kernel( exec, @@ -69,9 +69,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DIAGONAL_APPLY_TO_DENSE_KERNEL); template void right_apply_to_dense(std::shared_ptr exec, - const matrix::Diagonal *a, - const matrix::Dense *b, - matrix::Dense *c) + const matrix::Diagonal* a, + const matrix::Dense* b, + matrix::Dense* c) { run_kernel( exec, @@ -87,9 +87,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void right_apply_to_csr(std::shared_ptr exec, - const matrix::Diagonal *a, - const matrix::Csr *b, - matrix::Csr *c) + const matrix::Diagonal* a, + const matrix::Csr* b, + matrix::Csr* c) { // TODO: combine copy and diag apply together c->copy_from(b); @@ -108,8 +108,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Diagonal *source, - matrix::Csr *result) + const matrix::Diagonal* source, + matrix::Csr* result) { run_kernel( exec, @@ -133,8 +133,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void conj_transpose(std::shared_ptr exec, - const matrix::Diagonal *orig, - matrix::Diagonal *trans) + const matrix::Diagonal* orig, + matrix::Diagonal* trans) { run_kernel( exec, diff --git a/common/unified/preconditioner/jacobi_kernels.cpp b/common/unified/preconditioner/jacobi_kernels.cpp index b7160a97617..bf25a9d649a 100644 --- a/common/unified/preconditioner/jacobi_kernels.cpp +++ b/common/unified/preconditioner/jacobi_kernels.cpp @@ -52,7 +52,7 @@ namespace jacobi { template void scalar_conj(std::shared_ptr exec, - const Array &diag, Array &conj_diag) + const Array& diag, Array& conj_diag) { run_kernel( exec, @@ -67,7 +67,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_CONJ_KERNEL); template void invert_diagonal(std::shared_ptr exec, - const Array &diag, Array &inv_diag) + const Array& diag, Array& inv_diag) { run_kernel( exec, @@ -82,11 +82,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_INVERT_DIAGONAL_KERNEL); template void scalar_apply(std::shared_ptr exec, - const Array &diag, - const matrix::Dense *alpha, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *x) + const Array& diag, + const matrix::Dense* alpha, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* x) { if (alpha->get_size()[1] > 1) { run_kernel( @@ -116,9 +116,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL); template void simple_scalar_apply(std::shared_ptr exec, - const Array &diag, - const matrix::Dense *b, - matrix::Dense *x) + const Array& diag, + const matrix::Dense* b, + matrix::Dense* x) { run_kernel( exec, @@ -134,8 +134,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void scalar_convert_to_dense(std::shared_ptr exec, - const Array &blocks, - matrix::Dense *result) + const Array& blocks, + matrix::Dense* result) { run_kernel( exec, diff --git a/common/unified/solver/bicg_kernels.cpp b/common/unified/solver/bicg_kernels.cpp index 3f646b93819..95aea900ea4 100644 --- a/common/unified/solver/bicg_kernels.cpp +++ b/common/unified/solver/bicg_kernels.cpp @@ -52,13 +52,13 @@ namespace bicg { template void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *z, matrix::Dense *p, - matrix::Dense *q, matrix::Dense *prev_rho, - matrix::Dense *rho, matrix::Dense *r2, - matrix::Dense *z2, matrix::Dense *p2, - matrix::Dense *q2, - Array *stop_status) + const matrix::Dense* b, matrix::Dense* r, + matrix::Dense* z, matrix::Dense* p, + matrix::Dense* q, matrix::Dense* prev_rho, + matrix::Dense* rho, matrix::Dense* r2, + matrix::Dense* z2, matrix::Dense* p2, + matrix::Dense* q2, + Array* stop_status) { run_kernel_solver( exec, @@ -87,11 +87,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - matrix::Dense *p, const matrix::Dense *z, - matrix::Dense *p2, const matrix::Dense *z2, - const matrix::Dense *rho, - const matrix::Dense *prev_rho, - const Array *stop_status) + matrix::Dense* p, const matrix::Dense* z, + matrix::Dense* p2, const matrix::Dense* z2, + const matrix::Dense* rho, + const matrix::Dense* prev_rho, + const Array* stop_status) { run_kernel_solver( exec, @@ -113,13 +113,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - matrix::Dense *x, matrix::Dense *r, - matrix::Dense *r2, const matrix::Dense *p, - const matrix::Dense *q, - const matrix::Dense *q2, - const matrix::Dense *beta, - const matrix::Dense *rho, - const Array *stop_status) + matrix::Dense* x, matrix::Dense* r, + matrix::Dense* r2, const matrix::Dense* p, + const matrix::Dense* q, + const matrix::Dense* q2, + const matrix::Dense* beta, + const matrix::Dense* rho, + const Array* stop_status) { run_kernel_solver( exec, diff --git a/common/unified/solver/bicgstab_kernels.cpp b/common/unified/solver/bicgstab_kernels.cpp index a1d98b139e5..f5cbf984f34 100644 --- a/common/unified/solver/bicgstab_kernels.cpp +++ b/common/unified/solver/bicgstab_kernels.cpp @@ -52,15 +52,15 @@ namespace bicgstab { template void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *rr, matrix::Dense *y, - matrix::Dense *s, matrix::Dense *t, - matrix::Dense *z, matrix::Dense *v, - matrix::Dense *p, matrix::Dense *prev_rho, - matrix::Dense *rho, matrix::Dense *alpha, - matrix::Dense *beta, matrix::Dense *gamma, - matrix::Dense *omega, - Array *stop_status) + const matrix::Dense* b, matrix::Dense* r, + matrix::Dense* rr, matrix::Dense* y, + matrix::Dense* s, matrix::Dense* t, + matrix::Dense* z, matrix::Dense* v, + matrix::Dense* p, matrix::Dense* prev_rho, + matrix::Dense* rho, matrix::Dense* alpha, + matrix::Dense* beta, matrix::Dense* gamma, + matrix::Dense* omega, + Array* stop_status) { run_kernel_solver( exec, @@ -90,13 +90,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - const matrix::Dense *r, matrix::Dense *p, - const matrix::Dense *v, - const matrix::Dense *rho, - const matrix::Dense *prev_rho, - const matrix::Dense *alpha, - const matrix::Dense *omega, - const Array *stop_status) + const matrix::Dense* r, matrix::Dense* p, + const matrix::Dense* v, + const matrix::Dense* rho, + const matrix::Dense* prev_rho, + const matrix::Dense* alpha, + const matrix::Dense* omega, + const Array* stop_status) { run_kernel_solver( exec, @@ -119,12 +119,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - const matrix::Dense *r, matrix::Dense *s, - const matrix::Dense *v, - const matrix::Dense *rho, - matrix::Dense *alpha, - const matrix::Dense *beta, - const Array *stop_status) + const matrix::Dense* r, matrix::Dense* s, + const matrix::Dense* v, + const matrix::Dense* rho, + matrix::Dense* alpha, + const matrix::Dense* beta, + const Array* stop_status) { run_kernel_solver( exec, @@ -148,12 +148,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_2_KERNEL); template void step_3( - std::shared_ptr exec, matrix::Dense *x, - matrix::Dense *r, const matrix::Dense *s, - const matrix::Dense *t, const matrix::Dense *y, - const matrix::Dense *z, const matrix::Dense *alpha, - const matrix::Dense *beta, const matrix::Dense *gamma, - matrix::Dense *omega, const Array *stop_status) + std::shared_ptr exec, matrix::Dense* x, + matrix::Dense* r, const matrix::Dense* s, + const matrix::Dense* t, const matrix::Dense* y, + const matrix::Dense* z, const matrix::Dense* alpha, + const matrix::Dense* beta, const matrix::Dense* gamma, + matrix::Dense* omega, const Array* stop_status) { run_kernel_solver( exec, @@ -180,9 +180,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_3_KERNEL); template void finalize(std::shared_ptr exec, - matrix::Dense *x, const matrix::Dense *y, - const matrix::Dense *alpha, - Array *stop_status) + matrix::Dense* x, const matrix::Dense* y, + const matrix::Dense* alpha, + Array* stop_status) { run_kernel_solver( exec, diff --git a/common/unified/solver/cg_kernels.cpp b/common/unified/solver/cg_kernels.cpp index f47390aa3df..2001d9a9257 100644 --- a/common/unified/solver/cg_kernels.cpp +++ b/common/unified/solver/cg_kernels.cpp @@ -52,11 +52,11 @@ namespace cg { template void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *z, matrix::Dense *p, - matrix::Dense *q, matrix::Dense *prev_rho, - matrix::Dense *rho, - Array *stop_status) + const matrix::Dense* b, matrix::Dense* r, + matrix::Dense* z, matrix::Dense* p, + matrix::Dense* q, matrix::Dense* prev_rho, + matrix::Dense* rho, + Array* stop_status) { run_kernel_solver( exec, @@ -80,10 +80,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - matrix::Dense *p, const matrix::Dense *z, - const matrix::Dense *rho, - const matrix::Dense *prev_rho, - const Array *stop_status) + matrix::Dense* p, const matrix::Dense* z, + const matrix::Dense* rho, + const matrix::Dense* prev_rho, + const Array* stop_status) { run_kernel_solver( exec, @@ -103,12 +103,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - matrix::Dense *x, matrix::Dense *r, - const matrix::Dense *p, - const matrix::Dense *q, - const matrix::Dense *beta, - const matrix::Dense *rho, - const Array *stop_status) + matrix::Dense* x, matrix::Dense* r, + const matrix::Dense* p, + const matrix::Dense* q, + const matrix::Dense* beta, + const matrix::Dense* rho, + const Array* stop_status) { run_kernel_solver( exec, diff --git a/common/unified/solver/cgs_kernels.cpp b/common/unified/solver/cgs_kernels.cpp index 948c52864f2..9ce24eaf514 100644 --- a/common/unified/solver/cgs_kernels.cpp +++ b/common/unified/solver/cgs_kernels.cpp @@ -52,16 +52,16 @@ namespace cgs { template void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *r_tld, matrix::Dense *p, - matrix::Dense *q, matrix::Dense *u, - matrix::Dense *u_hat, - matrix::Dense *v_hat, matrix::Dense *t, - matrix::Dense *alpha, matrix::Dense *beta, - matrix::Dense *gamma, - matrix::Dense *prev_rho, - matrix::Dense *rho, - Array *stop_status) + const matrix::Dense* b, matrix::Dense* r, + matrix::Dense* r_tld, matrix::Dense* p, + matrix::Dense* q, matrix::Dense* u, + matrix::Dense* u_hat, + matrix::Dense* v_hat, matrix::Dense* t, + matrix::Dense* alpha, matrix::Dense* beta, + matrix::Dense* gamma, + matrix::Dense* prev_rho, + matrix::Dense* rho, + Array* stop_status) { run_kernel_solver( exec, @@ -91,11 +91,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - const matrix::Dense *r, matrix::Dense *u, - matrix::Dense *p, const matrix::Dense *q, - matrix::Dense *beta, const matrix::Dense *rho, - const matrix::Dense *prev_rho, - const Array *stop_status) + const matrix::Dense* r, matrix::Dense* u, + matrix::Dense* p, const matrix::Dense* q, + matrix::Dense* beta, const matrix::Dense* rho, + const matrix::Dense* prev_rho, + const Array* stop_status) { run_kernel_solver( exec, @@ -122,12 +122,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - const matrix::Dense *u, - const matrix::Dense *v_hat, matrix::Dense *q, - matrix::Dense *t, matrix::Dense *alpha, - const matrix::Dense *rho, - const matrix::Dense *gamma, - const Array *stop_status) + const matrix::Dense* u, + const matrix::Dense* v_hat, matrix::Dense* q, + matrix::Dense* t, matrix::Dense* alpha, + const matrix::Dense* rho, + const matrix::Dense* gamma, + const Array* stop_status) { run_kernel_solver( exec, @@ -152,10 +152,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_STEP_2_KERNEL); template void step_3(std::shared_ptr exec, - const matrix::Dense *t, - const matrix::Dense *u_hat, matrix::Dense *r, - matrix::Dense *x, const matrix::Dense *alpha, - const Array *stop_status) + const matrix::Dense* t, + const matrix::Dense* u_hat, matrix::Dense* r, + matrix::Dense* x, const matrix::Dense* alpha, + const Array* stop_status) { run_kernel_solver( exec, diff --git a/common/unified/solver/fcg_kernels.cpp b/common/unified/solver/fcg_kernels.cpp index ba9587e350d..d715b0e0761 100644 --- a/common/unified/solver/fcg_kernels.cpp +++ b/common/unified/solver/fcg_kernels.cpp @@ -52,12 +52,12 @@ namespace fcg { template void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *z, matrix::Dense *p, - matrix::Dense *q, matrix::Dense *t, - matrix::Dense *prev_rho, - matrix::Dense *rho, matrix::Dense *rho_t, - Array *stop_status) + const matrix::Dense* b, matrix::Dense* r, + matrix::Dense* z, matrix::Dense* p, + matrix::Dense* q, matrix::Dense* t, + matrix::Dense* prev_rho, + matrix::Dense* rho, matrix::Dense* rho_t, + Array* stop_status) { run_kernel_solver( exec, @@ -83,10 +83,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - matrix::Dense *p, const matrix::Dense *z, - const matrix::Dense *rho_t, - const matrix::Dense *prev_rho, - const Array *stop_status) + matrix::Dense* p, const matrix::Dense* z, + const matrix::Dense* rho_t, + const matrix::Dense* prev_rho, + const Array* stop_status) { run_kernel_solver( exec, @@ -106,12 +106,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - matrix::Dense *x, matrix::Dense *r, - matrix::Dense *t, const matrix::Dense *p, - const matrix::Dense *q, - const matrix::Dense *beta, - const matrix::Dense *rho, - const Array *stop_status) + matrix::Dense* x, matrix::Dense* r, + matrix::Dense* t, const matrix::Dense* p, + const matrix::Dense* q, + const matrix::Dense* beta, + const matrix::Dense* rho, + const Array* stop_status) { run_kernel_solver( exec, diff --git a/common/unified/solver/ir_kernels.cpp b/common/unified/solver/ir_kernels.cpp index 3ba7c957ee3..0599a6865d5 100644 --- a/common/unified/solver/ir_kernels.cpp +++ b/common/unified/solver/ir_kernels.cpp @@ -48,7 +48,7 @@ namespace ir { void initialize(std::shared_ptr exec, - Array *stop_status) + Array* stop_status) { run_kernel( exec, [] GKO_KERNEL(auto i, auto stop) { stop[i].reset(); }, diff --git a/core/base/allocator.hpp b/core/base/allocator.hpp index 791e525e037..60062e11794 100644 --- a/core/base/allocator.hpp +++ b/core/base/allocator.hpp @@ -86,7 +86,7 @@ class ExecutorAllocator { * @tparam U the element type of the allocator to be constructed. */ template - ExecutorAllocator(const ExecutorAllocator &other) + ExecutorAllocator(const ExecutorAllocator& other) : exec_{other.get_executor()} {} @@ -99,7 +99,7 @@ class ExecutorAllocator { * @param n the number of elements to allocate * @return the pointer to a newly allocated memory area of `n` elements. */ - T *allocate(std::size_t n) const { return exec_->alloc(n); } + T* allocate(std::size_t n) const { return exec_->alloc(n); } /** * Frees a memory area that was allocated by this allocator. @@ -108,7 +108,7 @@ class ExecutorAllocator { * * @note The second parameter is unused. */ - void deallocate(T *ptr, std::size_t) const { exec_->free(ptr); } + void deallocate(T* ptr, std::size_t) const { exec_->free(ptr); } /** * Compares two ExecutorAllocators for equality @@ -118,8 +118,8 @@ class ExecutorAllocator { * @return true iff the two allocators use the same executor */ template - friend bool operator==(const ExecutorAllocator &l, - const ExecutorAllocator &r) + friend bool operator==(const ExecutorAllocator& l, + const ExecutorAllocator& r) { return l.get_executor() == r.get_executor(); } @@ -132,8 +132,8 @@ class ExecutorAllocator { * @return true iff the two allocators use different executors */ template - friend bool operator!=(const ExecutorAllocator &l, - const ExecutorAllocator &r) + friend bool operator!=(const ExecutorAllocator& l, + const ExecutorAllocator& r) { return !(l == r); } diff --git a/core/base/array.cpp b/core/base/array.cpp index 3a2ad7b5568..6057477258f 100644 --- a/core/base/array.cpp +++ b/core/base/array.cpp @@ -68,7 +68,7 @@ namespace detail { template void convert_data(std::shared_ptr exec, size_type size, - const SourceType *src, TargetType *dst) + const SourceType* src, TargetType* dst) { exec->run(conversion::make_convert(size, src, dst)); } @@ -76,7 +76,7 @@ void convert_data(std::shared_ptr exec, size_type size, #define GKO_DECLARE_ARRAY_CONVERSION(From, To) \ void convert_data(std::shared_ptr, size_type, \ - const From *, To *) + const From*, To*) GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION(GKO_DECLARE_ARRAY_CONVERSION); diff --git a/core/base/combination.cpp b/core/base/combination.cpp index 5051f43565a..258e30f6813 100644 --- a/core/base/combination.cpp +++ b/core/base/combination.cpp @@ -43,8 +43,8 @@ namespace { template inline void initialize_scalars(std::shared_ptr exec, - std::unique_ptr &zero, - std::unique_ptr &one) + std::unique_ptr& zero, + std::unique_ptr& one) { if (zero == nullptr) { zero = initialize>({gko::zero()}, @@ -66,11 +66,11 @@ std::unique_ptr Combination::transpose() const auto transposed = Combination::create(this->get_executor()); transposed->set_size(gko::transpose(this->get_size())); // copy coefficients - for (auto &coef : get_coefficients()) { + for (auto& coef : get_coefficients()) { transposed->coefficients_.push_back(share(coef->clone())); } // transpose operators - for (auto &op : get_operators()) { + for (auto& op : get_operators()) { transposed->operators_.push_back( share(as(op)->transpose())); } @@ -85,12 +85,12 @@ std::unique_ptr Combination::conj_transpose() const auto transposed = Combination::create(this->get_executor()); transposed->set_size(gko::transpose(this->get_size())); // conjugate coefficients! - for (auto &coef : get_coefficients()) { + for (auto& coef : get_coefficients()) { transposed->coefficients_.push_back( share(as(coef)->conj_transpose())); } // conjugate-transpose operators - for (auto &op : get_operators()) { + for (auto& op : get_operators()) { transposed->operators_.push_back( share(as(op)->conj_transpose())); } @@ -100,7 +100,7 @@ std::unique_ptr Combination::conj_transpose() const template -void Combination::apply_impl(const LinOp *b, LinOp *x) const +void Combination::apply_impl(const LinOp* b, LinOp* x) const { initialize_scalars(this->get_executor(), cache_.zero, cache_.one); @@ -118,8 +118,8 @@ void Combination::apply_impl(const LinOp *b, LinOp *x) const template -void Combination::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void Combination::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { diff --git a/core/base/composition.cpp b/core/base/composition.cpp index 3a67a8ebff6..ec5d3b3aaf1 100644 --- a/core/base/composition.cpp +++ b/core/base/composition.cpp @@ -58,8 +58,8 @@ GKO_REGISTER_OPERATION(fill_array, components::fill_array); template std::unique_ptr apply_inner_operators( - const std::vector> &operators, - Array &storage, const LinOp *rhs) + const std::vector>& operators, + Array& storage, const LinOp* rhs) { using Dense = matrix::Dense; // determine amount of necessary storage: @@ -140,7 +140,7 @@ std::unique_ptr Composition::transpose() const // transpose and reverse operators std::transform(this->get_operators().rbegin(), this->get_operators().rend(), std::back_inserter(transposed->operators_), - [](const std::shared_ptr &op) { + [](const std::shared_ptr& op) { return share(as(op)->transpose()); }); @@ -156,7 +156,7 @@ std::unique_ptr Composition::conj_transpose() const // conjugate-transpose and reverse operators std::transform(this->get_operators().rbegin(), this->get_operators().rend(), std::back_inserter(transposed->operators_), - [](const std::shared_ptr &op) { + [](const std::shared_ptr& op) { return share(as(op)->conj_transpose()); }); @@ -165,7 +165,7 @@ std::unique_ptr Composition::conj_transpose() const template -void Composition::apply_impl(const LinOp *b, LinOp *x) const +void Composition::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -182,8 +182,8 @@ void Composition::apply_impl(const LinOp *b, LinOp *x) const template -void Composition::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void Composition::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { diff --git a/core/base/executor.cpp b/core/base/executor.cpp index 109dc6601ed..6c31b362fd6 100644 --- a/core/base/executor.cpp +++ b/core/base/executor.cpp @@ -63,7 +63,7 @@ void Operation::run(std::shared_ptr executor) const } -const char *Operation::get_name() const noexcept +const char* Operation::get_name() const noexcept { static auto name = name_demangling::get_dynamic_type(*this); return name.c_str(); diff --git a/core/base/extended_float.hpp b/core/base/extended_float.hpp index 34b3eed0d59..9175ab5a877 100644 --- a/core/base/extended_float.hpp +++ b/core/base/extended_float.hpp @@ -316,9 +316,9 @@ class half { { #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) const auto tmp = __float2half_rn(val); - data_ = reinterpret_cast(tmp); + data_ = reinterpret_cast(tmp); #else // defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) - data_ = float2half(reinterpret_cast(val)); + data_ = float2half(reinterpret_cast(val)); #endif // defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) } @@ -328,10 +328,10 @@ class half { GKO_ATTRIBUTES operator float32() const noexcept { #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) - return __half2float(reinterpret_cast(data_)); + return __half2float(reinterpret_cast(data_)); #else // defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) const auto bits = half2float(data_); - return reinterpret_cast(bits); + return reinterpret_cast(bits); #endif // defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) } @@ -453,9 +453,9 @@ class truncated { truncated() noexcept = default; - GKO_ATTRIBUTES explicit truncated(const float_type &val) noexcept + GKO_ATTRIBUTES explicit truncated(const float_type& val) noexcept { - const auto &bits = reinterpret_cast(val); + const auto& bits = reinterpret_cast(val); data_ = static_cast((bits & component_mask) >> component_position); } @@ -464,7 +464,7 @@ class truncated { { const auto bits = static_cast(data_) << component_position; - return reinterpret_cast(bits); + return reinterpret_cast(bits); } GKO_ATTRIBUTES truncated operator-() const noexcept @@ -493,12 +493,12 @@ class complex { public: using value_type = gko::half; - complex(const value_type &real = 0.f, const value_type &imag = 0.f) + complex(const value_type& real = 0.f, const value_type& imag = 0.f) : real_(real), imag_(imag) {} template - explicit complex(const complex &other) + explicit complex(const complex& other) : complex(static_cast(other.real()), static_cast(other.imag())) {} @@ -525,12 +525,12 @@ class complex> { public: using value_type = gko::truncated; - complex(const value_type &real = 0.f, const value_type &imag = 0.f) + complex(const value_type& real = 0.f, const value_type& imag = 0.f) : real_(real), imag_(imag) {} template - explicit complex(const complex &other) + explicit complex(const complex& other) : complex(static_cast(other.real()), static_cast(other.imag())) {} diff --git a/core/base/iterator_factory.hpp b/core/base/iterator_factory.hpp index 0f5450fb907..4215b9dc6da 100644 --- a/core/base/iterator_factory.hpp +++ b/core/base/iterator_factory.hpp @@ -87,7 +87,7 @@ class IteratorFactory { ToSortType dominant; SecondaryType secondary; - friend bool operator<(const element &left, const element &right) + friend bool operator<(const element& left, const element& right) { return left.dominant < right.dominant; } @@ -110,28 +110,28 @@ class IteratorFactory { ~Reference() {} - Reference(IteratorFactory *parent, array_index_type array_index) + Reference(IteratorFactory* parent, array_index_type array_index) : parent_(parent), arr_index_(array_index) {} // Since it must be `MoveConstructible` - Reference(Reference &&other) + Reference(Reference&& other) : parent_(other.parent_), arr_index_(std::move(other.arr_index_)) {} - Reference(const Reference &other) + Reference(const Reference& other) : parent_(other.parent_), arr_index_(other.arr_index_) {} - Reference &operator=(element other) + Reference& operator=(element other) { dominant() = other.dominant; secondary() = other.secondary; return *this; } - Reference &operator=(const Reference &other) + Reference& operator=(const Reference& other) { dominant() = other.dominant(); secondary() = other.secondary(); @@ -139,7 +139,7 @@ class IteratorFactory { } // Since it must be `MoveAssignable` - Reference &operator=(Reference &&other) + Reference& operator=(Reference&& other) { // In C++11, it is legal for a nested class to access private // members of the parent class. @@ -159,40 +159,40 @@ class IteratorFactory { std::swap(a.secondary(), b.secondary()); } - friend bool operator<(const Reference &left, const Reference &right) + friend bool operator<(const Reference& left, const Reference& right) { return left.dominant() < right.dominant(); } - friend bool operator<(const Reference &left, const element &right) + friend bool operator<(const Reference& left, const element& right) { return left.dominant() < right.dominant; } - friend bool operator<(const element &left, const Reference &right) + friend bool operator<(const element& left, const Reference& right) { return left.dominant < right.dominant(); } - ToSortType &dominant() { return parent_->dominant_values_[arr_index_]; } + ToSortType& dominant() { return parent_->dominant_values_[arr_index_]; } - const ToSortType &dominant() const + const ToSortType& dominant() const { return parent_->dominant_values_[arr_index_]; } - SecondaryType &secondary() + SecondaryType& secondary() { return parent_->secondary_values_[arr_index_]; } - const SecondaryType &secondary() const + const SecondaryType& secondary() const { return parent_->secondary_values_[arr_index_]; } private: - IteratorFactory *parent_; + IteratorFactory* parent_; array_index_type arr_index_; }; @@ -218,15 +218,15 @@ class IteratorFactory { ~Iterator() {} - Iterator(IteratorFactory *parent, difference_type array_index) + Iterator(IteratorFactory* parent, difference_type array_index) : parent_(parent), arr_index_(array_index) {} - Iterator(const Iterator &other) + Iterator(const Iterator& other) : parent_(other.parent_), arr_index_(other.arr_index_) {} - Iterator &operator=(const Iterator &other) + Iterator& operator=(const Iterator& other) { arr_index_ = other.arr_index_; return *this; @@ -234,19 +234,19 @@ class IteratorFactory { // Operators needed for the std::sort requirement of // `LegacyRandomAccessIterator` - Iterator &operator+=(difference_type i) + Iterator& operator+=(difference_type i) { arr_index_ += i; return *this; } - Iterator &operator-=(difference_type i) + Iterator& operator-=(difference_type i) { arr_index_ -= i; return *this; } - Iterator &operator++() // Prefix increment (++i) + Iterator& operator++() // Prefix increment (++i) { ++arr_index_; return *this; @@ -259,7 +259,7 @@ class IteratorFactory { return temp; } - Iterator &operator--() // Prefix decrement (--i) + Iterator& operator--() // Prefix decrement (--i) { --arr_index_; return *this; @@ -277,7 +277,7 @@ class IteratorFactory { return {parent_, arr_index_ + i}; } - friend Iterator operator+(difference_type i, const Iterator &iter) + friend Iterator operator+(difference_type i, const Iterator& iter) { return {iter.parent_, iter.arr_index_ + i}; } @@ -287,7 +287,7 @@ class IteratorFactory { return {parent_, arr_index_ - i}; } - difference_type operator-(const Iterator &other) const + difference_type operator-(const Iterator& other) const { return arr_index_ - other.arr_index_; } @@ -300,38 +300,38 @@ class IteratorFactory { } // Comparable operators - bool operator==(const Iterator &other) const + bool operator==(const Iterator& other) const { return arr_index_ == other.arr_index_; } - bool operator!=(const Iterator &other) const + bool operator!=(const Iterator& other) const { return arr_index_ != other.arr_index_; } - bool operator<(const Iterator &other) const + bool operator<(const Iterator& other) const { return arr_index_ < other.arr_index_; } - bool operator<=(const Iterator &other) const + bool operator<=(const Iterator& other) const { return arr_index_ <= other.arr_index_; } - bool operator>(const Iterator &other) const + bool operator>(const Iterator& other) const { return arr_index_ > other.arr_index_; } - bool operator>=(const Iterator &other) const + bool operator>=(const Iterator& other) const { return arr_index_ >= other.arr_index_; } private: - IteratorFactory *parent_{}; + IteratorFactory* parent_{}; difference_type arr_index_{}; }; @@ -354,8 +354,8 @@ class IteratorFactory { * @note Both arrays must have at least `size` elements, otherwise, the * behaviour is undefined. */ - IteratorFactory(ToSortType *dominant_values, - SecondaryType *secondary_values, size_type size) + IteratorFactory(ToSortType* dominant_values, + SecondaryType* secondary_values, size_type size) : dominant_values_(dominant_values), secondary_values_(secondary_values), size_(size) @@ -377,8 +377,8 @@ class IteratorFactory { } private: - ToSortType *dominant_values_; - SecondaryType *secondary_values_; + ToSortType* dominant_values_; + SecondaryType* secondary_values_; size_type size_; }; diff --git a/core/base/mtx_io.cpp b/core/base/mtx_io.cpp index fea768d8c88..42bdb06a153 100644 --- a/core/base/mtx_io.cpp +++ b/core/base/mtx_io.cpp @@ -77,7 +77,7 @@ class mtx_io { * * @return an instance of the matrix. */ - static const mtx_io &get() + static const mtx_io& get() { static mtx_io instance; return instance; @@ -90,7 +90,7 @@ class mtx_io { * * @return the matrix data. */ - matrix_data read(std::istream &is) const + matrix_data read(std::istream& is) const { auto parsed_header = this->read_header(is); std::istringstream dimensions_stream(parsed_header.dimensions_line); @@ -107,8 +107,8 @@ class mtx_io { * @param data the matrix data to be written. * @param header the header to be printed at the start of the file. */ - void write(std::ostream &os, const matrix_data &data, - const std::string &header) const + void write(std::ostream& os, const matrix_data& data, + const std::string& header) const { std::istringstream header_stream(header); auto parsed_header = this->read_description_line(header_stream); @@ -124,15 +124,15 @@ class mtx_io { * entry of the matrix, depending on its storage scheme: */ struct entry_format { - virtual ValueType read_entry(std::istream &is) const = 0; - virtual void write_entry(std::ostream &os, - const ValueType &value) const = 0; + virtual ValueType read_entry(std::istream& is) const = 0; + virtual void write_entry(std::ostream& os, + const ValueType& value) const = 0; }; /** * maps entry format specification strings to algorithms */ - std::map format_map; + std::map format_map; /** * the value is encoded as a decimal number @@ -145,7 +145,7 @@ class mtx_io { * * @return the matrix entry. */ - ValueType read_entry(std::istream &is) const override + ValueType read_entry(std::istream& is) const override { double result{}; GKO_CHECK_STREAM(is >> result, "error while reading matrix entry"); @@ -158,8 +158,8 @@ class mtx_io { * @param os the output stream * @param value the matrix entry to be written */ - void write_entry(std::ostream &os, - const ValueType &value) const override + void write_entry(std::ostream& os, + const ValueType& value) const override { write_entry_impl(os, value); } @@ -167,7 +167,7 @@ class mtx_io { private: template static std::enable_if_t::value> write_entry_impl( - std::ostream &, const T &) + std::ostream&, const T&) { throw GKO_STREAM_ERROR( "trying to write a complex matrix into a real entry format"); @@ -175,7 +175,7 @@ class mtx_io { template static std::enable_if_t::value> write_entry_impl( - std::ostream &os, const T &value) + std::ostream& os, const T& value) { GKO_CHECK_STREAM(os << static_cast(value), "error while writing matrix entry"); @@ -194,7 +194,7 @@ class mtx_io { * * @return the matrix entry. */ - ValueType read_entry(std::istream &is) const override + ValueType read_entry(std::istream& is) const override { return read_entry_impl(is); } @@ -205,8 +205,8 @@ class mtx_io { * @param os the output stream * @param value the matrix entry to be written */ - void write_entry(std::ostream &os, - const ValueType &value) const override + void write_entry(std::ostream& os, + const ValueType& value) const override { GKO_CHECK_STREAM(os << static_cast(real(value)) << ' ' << static_cast(imag(value)), @@ -216,7 +216,7 @@ class mtx_io { private: template static std::enable_if_t::value, T> read_entry_impl( - std::istream &is) + std::istream& is) { using real_type = remove_complex; double real{}; @@ -228,7 +228,7 @@ class mtx_io { template static std::enable_if_t::value, T> read_entry_impl( - std::istream &) + std::istream&) { throw GKO_STREAM_ERROR( "trying to read a complex matrix into a real storage type"); @@ -247,7 +247,7 @@ class mtx_io { * * @return the matrix entry(one). */ - ValueType read_entry(std::istream &) const override + ValueType read_entry(std::istream&) const override { return one(); } @@ -258,7 +258,7 @@ class mtx_io { * @param dummy output stream * @param dummy matrix entry to be written */ - void write_entry(std::ostream &, const ValueType &) const override {} + void write_entry(std::ostream&, const ValueType&) const override {} } pattern_format{}; @@ -274,8 +274,8 @@ class mtx_io { size_type num_nonzeros) const = 0; virtual void insert_entry( - const IndexType &row, const IndexType &col, const ValueType &entry, - matrix_data &data) const = 0; + const IndexType& row, const IndexType& col, const ValueType& entry, + matrix_data& data) const = 0; virtual size_type get_row_start(size_type col) const = 0; }; @@ -283,7 +283,7 @@ class mtx_io { /** * maps storage modifier specification strings to algorithms */ - std::map modifier_map; + std::map modifier_map; /** * all (nonzero) elements of the matrix are stored @@ -313,8 +313,8 @@ class mtx_io { * @param data the data holding the matrix. */ void insert_entry( - const IndexType &row, const IndexType &col, const ValueType &entry, - matrix_data &data) const override + const IndexType& row, const IndexType& col, const ValueType& entry, + matrix_data& data) const override { data.nonzeros.emplace_back(row, col, entry); } @@ -353,8 +353,8 @@ class mtx_io { * @param data the data holding the matrix. */ void insert_entry( - const IndexType &row, const IndexType &col, const ValueType &entry, - matrix_data &data) const override + const IndexType& row, const IndexType& col, const ValueType& entry, + matrix_data& data) const override { data.nonzeros.emplace_back(row, col, entry); if (row != col) { @@ -398,8 +398,8 @@ class mtx_io { * @param data the data holding the matrix. */ void insert_entry( - const IndexType &row, const IndexType &col, const ValueType &entry, - matrix_data &data) const override + const IndexType& row, const IndexType& col, const ValueType& entry, + matrix_data& data) const override { data.nonzeros.emplace_back(row, col, entry); data.nonzeros.emplace_back(col, row, -entry); @@ -443,8 +443,8 @@ class mtx_io { * @param data the data holding the matrix. */ void insert_entry( - const IndexType &row, const IndexType &col, const ValueType &entry, - matrix_data &data) const override + const IndexType& row, const IndexType& col, const ValueType& entry, + matrix_data& data) const override { data.nonzeros.emplace_back(row, col, entry); if (row != col) { @@ -476,9 +476,9 @@ class mtx_io { * @return the matrix data */ virtual matrix_data read_data( - std::istream &header, std::istream &content, - const entry_format *entry_reader, - const storage_modifier *modifier) const = 0; + std::istream& header, std::istream& content, + const entry_format* entry_reader, + const storage_modifier* modifier) const = 0; /** * Write the matrix data * @@ -487,16 +487,16 @@ class mtx_io { * @param entry_writer The entry format to write in. * @param modifier The strorage modifer */ - virtual void write_data(std::ostream &os, - const matrix_data &data, - const entry_format *entry_writer, - const storage_modifier *modifier) const = 0; + virtual void write_data(std::ostream& os, + const matrix_data& data, + const entry_format* entry_writer, + const storage_modifier* modifier) const = 0; }; /** * maps storage layout specification strings to algorithms */ - std::map layout_map; + std::map layout_map; /** * the matrix is sparse, and every nonzero is stored together with its @@ -514,9 +514,9 @@ class mtx_io { * @return the matrix data */ matrix_data read_data( - std::istream &header, std::istream &content, - const entry_format *entry_reader, - const storage_modifier *modifier) const override + std::istream& header, std::istream& content, + const entry_format* entry_reader, + const storage_modifier* modifier) const override { size_type num_rows{}; size_type num_cols{}; @@ -550,16 +550,16 @@ class mtx_io { * @param entry_writer The entry format to write in. * @param modifier The strorage modifer */ - void write_data(std::ostream &os, - const matrix_data &data, - const entry_format *entry_writer, - const storage_modifier *) const override + void write_data(std::ostream& os, + const matrix_data& data, + const entry_format* entry_writer, + const storage_modifier*) const override { // TODO: use the storage modifier GKO_CHECK_STREAM(os << data.size[0] << ' ' << data.size[1] << ' ' << data.nonzeros.size() << '\n', "error when writing size information"); - for (const auto &nonzero : data.nonzeros) { + for (const auto& nonzero : data.nonzeros) { GKO_CHECK_STREAM( os << nonzero.row + 1 << ' ' << nonzero.column + 1 << ' ', "error when writing matrix index"); @@ -585,9 +585,9 @@ class mtx_io { * @return the matrix data */ matrix_data read_data( - std::istream &header, std::istream &content, - const entry_format *entry_reader, - const storage_modifier *modifier) const override + std::istream& header, std::istream& content, + const entry_format* entry_reader, + const storage_modifier* modifier) const override { size_type num_rows{}; size_type num_cols{}; @@ -619,10 +619,10 @@ class mtx_io { * @param entry_writer The entry format to write in. * @param modifier The strorage modifer */ - void write_data(std::ostream &os, - const matrix_data &data, - const entry_format *entry_writer, - const storage_modifier *) const override + void write_data(std::ostream& os, + const matrix_data& data, + const entry_format* entry_writer, + const storage_modifier*) const override { using nt = typename matrix_data::nonzero_type; auto nonzeros = data.nonzeros; @@ -673,9 +673,9 @@ class mtx_io { * read/write the rest of the file */ struct header_data { - const entry_format *entry{}; - const storage_modifier *modifier{}; - const storage_layout *layout{}; + const entry_format* entry{}; + const storage_modifier* modifier{}; + const storage_layout* layout{}; std::string dimensions_line{}; }; @@ -686,7 +686,7 @@ class mtx_io { * * @return the data containing the description */ - header_data read_description_line(std::istream &is) const + header_data read_description_line(std::istream& is) const { header_data data{}; @@ -731,7 +731,7 @@ class mtx_io { * * @return the header data */ - header_data read_header(std::istream &is) const + header_data read_header(std::istream& is) const { auto data = read_description_line(is); do { @@ -754,7 +754,7 @@ class mtx_io { * @return matrix_data the matrix data. */ template -matrix_data read_raw(std::istream &is) +matrix_data read_raw(std::istream& is) { return mtx_io::get().read(is); } @@ -768,7 +768,7 @@ matrix_data read_raw(std::istream &is) * @param layout the layout type which the data should be written in. */ template -void write_raw(std::ostream &os, const matrix_data &data, +void write_raw(std::ostream& os, const matrix_data& data, layout_type layout) { // TODO: add support for all layout combinations @@ -781,10 +781,10 @@ void write_raw(std::ostream &os, const matrix_data &data, #define GKO_DECLARE_READ_RAW(ValueType, IndexType) \ - matrix_data read_raw(std::istream &is) + matrix_data read_raw(std::istream& is) #define GKO_DECLARE_WRITE_RAW(ValueType, IndexType) \ - void write_raw(std::ostream &os, \ - const matrix_data &data, \ + void write_raw(std::ostream& os, \ + const matrix_data& data, \ layout_type layout) GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_READ_RAW); GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_WRITE_RAW); diff --git a/core/base/perturbation.cpp b/core/base/perturbation.cpp index c7fead043d6..f82cd70f1d5 100644 --- a/core/base/perturbation.cpp +++ b/core/base/perturbation.cpp @@ -41,7 +41,7 @@ namespace gko { template -void Perturbation::apply_impl(const LinOp *b, LinOp *x) const +void Perturbation::apply_impl(const LinOp* b, LinOp* x) const { // x = (I + scalar * basis * projector) * b // temp = projector * b : projector->apply(b, temp) @@ -63,8 +63,8 @@ void Perturbation::apply_impl(const LinOp *b, LinOp *x) const template -void Perturbation::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void Perturbation::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { // x = alpha * (I + scalar * basis * projector) b + beta * x // = beta * x + alpha * b + alpha * scalar * basis * projector * b diff --git a/core/base/types.hpp b/core/base/types.hpp index a820ddfd91e..f982e22fde0 100644 --- a/core/base/types.hpp +++ b/core/base/types.hpp @@ -81,7 +81,7 @@ constexpr std::enable_if_t mask() */ template constexpr std::enable_if_t<(num_groups == current_shift + 1), int> shift( - const std::array &bits) + const std::array& bits) { return 0; } @@ -93,7 +93,7 @@ constexpr std::enable_if_t<(num_groups == current_shift + 1), int> shift( */ template constexpr std::enable_if_t<(num_groups > current_shift + 1), int> shift( - const std::array &bits) + const std::array& bits) { return bits[current_shift + 1] + shift<(current_shift + 1), num_groups>(bits); @@ -190,7 +190,7 @@ class ConfigSet { template static constexpr std::enable_if_t<(current_iter < num_groups), std::uint32_t> - encode(std::uint32_t first, Rest &&... rest) + encode(std::uint32_t first, Rest&&... rest) { constexpr int shift = detail::shift(bits); if (current_iter == 0) { diff --git a/core/base/utils.hpp b/core/base/utils.hpp index bf09996ed3b..911945cef78 100644 --- a/core/base/utils.hpp +++ b/core/base/utils.hpp @@ -51,7 +51,7 @@ namespace kernels { template -GKO_ATTRIBUTES GKO_INLINE ValueType checked_load(const ValueType *p, +GKO_ATTRIBUTES GKO_INLINE ValueType checked_load(const ValueType* p, IndexType i, IndexType size, ValueType sentinel) { @@ -73,7 +73,7 @@ struct conversion_sort_helper> { using mtx_type = matrix::Csr; template static std::unique_ptr get_sorted_conversion( - std::shared_ptr &exec, Source *source) + std::shared_ptr& exec, Source* source) { auto editable_mtx = mtx_type::create(exec); as>(source)->convert_to(lend(editable_mtx)); @@ -84,8 +84,8 @@ struct conversion_sort_helper> { template -std::unique_ptr> convert_to_with_sorting_impl( - std::shared_ptr &exec, Source *obj, bool skip_sorting) +std::unique_ptr> convert_to_with_sorting_impl( + std::shared_ptr& exec, Source* obj, bool skip_sorting) { if (skip_sorting) { return copy_and_convert_to(exec, obj); @@ -100,7 +100,7 @@ std::unique_ptr> convert_to_with_sorting_impl( template std::shared_ptr convert_to_with_sorting_impl( - std::shared_ptr &exec, std::shared_ptr obj, + std::shared_ptr& exec, std::shared_ptr obj, bool skip_sorting) { if (skip_sorting) { @@ -139,8 +139,8 @@ std::shared_ptr convert_to_with_sorting_impl( * not */ template -std::unique_ptr> convert_to_with_sorting( - std::shared_ptr exec, Source *obj, bool skip_sorting) +std::unique_ptr> convert_to_with_sorting( + std::shared_ptr exec, Source* obj, bool skip_sorting) { return detail::convert_to_with_sorting_impl(exec, obj, skip_sorting); } @@ -153,8 +153,8 @@ std::unique_ptr> convert_to_with_sorting( * also const */ template -std::unique_ptr> -convert_to_with_sorting(std::shared_ptr exec, const Source *obj, +std::unique_ptr> +convert_to_with_sorting(std::shared_ptr exec, const Source* obj, bool skip_sorting) { return detail::convert_to_with_sorting_impl(exec, obj, @@ -168,8 +168,8 @@ convert_to_with_sorting(std::shared_ptr exec, const Source *obj, * @note This version has a unique_ptr as the source instead of a plain pointer */ template -std::unique_ptr> convert_to_with_sorting( - std::shared_ptr exec, const std::unique_ptr &obj, +std::unique_ptr> convert_to_with_sorting( + std::shared_ptr exec, const std::unique_ptr& obj, bool skip_sorting) { return detail::convert_to_with_sorting_impl(exec, obj.get(), diff --git a/core/base/version.cpp b/core/base/version.cpp index 1cc7a8c849b..11c486587af 100644 --- a/core/base/version.cpp +++ b/core/base/version.cpp @@ -45,9 +45,9 @@ version version_info::get_core_version() noexcept } -std::ostream &operator<<(std::ostream &os, const version_info &ver_info) +std::ostream& operator<<(std::ostream& os, const version_info& ver_info) { - auto print_version = [](std::ostream &os, const version &ver) -> void { + auto print_version = [](std::ostream& os, const version& ver) -> void { static const std::string not_compiled_tag = "not compiled"; if (ver.tag == not_compiled_tag) { os << "not compiled"; diff --git a/core/components/absolute_array.hpp b/core/components/absolute_array.hpp index 5f66c89254f..122b5464d55 100644 --- a/core/components/absolute_array.hpp +++ b/core/components/absolute_array.hpp @@ -48,12 +48,12 @@ namespace kernels { #define GKO_DECLARE_INPLACE_ABSOLUTE_ARRAY_KERNEL(ValueType) \ void inplace_absolute_array(std::shared_ptr exec, \ - ValueType *data, size_type num_entries) + ValueType* data, size_type num_entries) #define GKO_DECLARE_OUTPLACE_ABSOLUTE_ARRAY_KERNEL(ValueType) \ void outplace_absolute_array(std::shared_ptr exec, \ - const ValueType *in, size_type num_entries, \ - remove_complex *out) + const ValueType* in, size_type num_entries, \ + remove_complex* out) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/components/fill_array.hpp b/core/components/fill_array.hpp index 0845f03701f..0faf19d1bae 100644 --- a/core/components/fill_array.hpp +++ b/core/components/fill_array.hpp @@ -47,11 +47,11 @@ namespace kernels { #define GKO_DECLARE_FILL_ARRAY_KERNEL(ValueType) \ void fill_array(std::shared_ptr exec, \ - ValueType *data, size_type num_entries, ValueType val) + ValueType* data, size_type num_entries, ValueType val) #define GKO_DECLARE_FILL_SEQ_ARRAY_KERNEL(ValueType) \ void fill_seq_array(std::shared_ptr exec, \ - ValueType *data, size_type num_entries) + ValueType* data, size_type num_entries) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/components/precision_conversion.hpp b/core/components/precision_conversion.hpp index 9f21b59d2dd..cc69d25cfd4 100644 --- a/core/components/precision_conversion.hpp +++ b/core/components/precision_conversion.hpp @@ -48,8 +48,8 @@ namespace kernels { #define GKO_DECLARE_CONVERT_PRECISION_KERNEL(SourceType, TargetType) \ void convert_precision(std::shared_ptr exec, \ - size_type size, const SourceType *in, \ - TargetType *out) + size_type size, const SourceType* in, \ + TargetType* out) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/components/prefix_sum.hpp b/core/components/prefix_sum.hpp index b0cdf34018d..557ce0c358b 100644 --- a/core/components/prefix_sum.hpp +++ b/core/components/prefix_sum.hpp @@ -63,7 +63,7 @@ namespace kernels { */ #define GKO_DECLARE_PREFIX_SUM_KERNEL(IndexType) \ void prefix_sum(std::shared_ptr exec, \ - IndexType *counts, size_type num_entries) + IndexType* counts, size_type num_entries) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp index a58ef92e5ab..21f9c63ed04 100644 --- a/core/device_hooks/cuda_hooks.cpp +++ b/core/device_hooks/cuda_hooks.cpp @@ -63,18 +63,18 @@ std::shared_ptr CudaExecutor::create( } -void CudaExecutor::populate_exec_info(const MachineTopology *mach_topo) +void CudaExecutor::populate_exec_info(const MachineTopology* mach_topo) { // This method is always called, so cannot throw when not compiled. } -void OmpExecutor::raw_copy_to(const CudaExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void OmpExecutor::raw_copy_to(const CudaExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const GKO_NOT_COMPILED(cuda); -void CudaExecutor::raw_free(void *ptr) const noexcept +void CudaExecutor::raw_free(void* ptr) const noexcept { // Free must never fail, as it can be called in destructors. // If the nvidia module was not compiled, the library couldn't have @@ -82,33 +82,33 @@ void CudaExecutor::raw_free(void *ptr) const noexcept } -void *CudaExecutor::raw_alloc(size_type num_bytes) const GKO_NOT_COMPILED(cuda); +void* CudaExecutor::raw_alloc(size_type num_bytes) const GKO_NOT_COMPILED(cuda); -void CudaExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void CudaExecutor::raw_copy_to(const OmpExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const GKO_NOT_COMPILED(cuda); -void CudaExecutor::raw_copy_to(const CudaExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void CudaExecutor::raw_copy_to(const CudaExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const GKO_NOT_COMPILED(cuda); -void CudaExecutor::raw_copy_to(const HipExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void CudaExecutor::raw_copy_to(const HipExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const GKO_NOT_COMPILED(cuda); -void CudaExecutor::raw_copy_to(const DpcppExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void CudaExecutor::raw_copy_to(const DpcppExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const GKO_NOT_COMPILED(cuda); void CudaExecutor::synchronize() const GKO_NOT_COMPILED(cuda); -void CudaExecutor::run(const Operation &op) const +void CudaExecutor::run(const Operation& op) const { op.run( std::static_pointer_cast(this->shared_from_this())); diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp index 65ca04dd840..c8bd57ab95e 100644 --- a/core/device_hooks/dpcpp_hooks.cpp +++ b/core/device_hooks/dpcpp_hooks.cpp @@ -59,18 +59,18 @@ std::shared_ptr DpcppExecutor::create( } -void DpcppExecutor::populate_exec_info(const MachineTopology *mach_topo) +void DpcppExecutor::populate_exec_info(const MachineTopology* mach_topo) { // This method is always called, so cannot throw when not compiled. } -void OmpExecutor::raw_copy_to(const DpcppExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void OmpExecutor::raw_copy_to(const DpcppExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const GKO_NOT_COMPILED(dpcpp); -bool OmpExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const +bool OmpExecutor::verify_memory_to(const DpcppExecutor* dest_exec) const { // Dummy check auto dev_type = dest_exec->get_device_type(); @@ -78,7 +78,7 @@ bool OmpExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const } -void DpcppExecutor::raw_free(void *ptr) const noexcept +void DpcppExecutor::raw_free(void* ptr) const noexcept { // Free must never fail, as it can be called in destructors. // If the nvidia module was not compiled, the library couldn't have @@ -86,34 +86,34 @@ void DpcppExecutor::raw_free(void *ptr) const noexcept } -void *DpcppExecutor::raw_alloc(size_type num_bytes) const +void* DpcppExecutor::raw_alloc(size_type num_bytes) const GKO_NOT_COMPILED(dpcpp); -void DpcppExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void DpcppExecutor::raw_copy_to(const OmpExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const GKO_NOT_COMPILED(dpcpp); -void DpcppExecutor::raw_copy_to(const CudaExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void DpcppExecutor::raw_copy_to(const CudaExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const GKO_NOT_COMPILED(dpcpp); -void DpcppExecutor::raw_copy_to(const HipExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void DpcppExecutor::raw_copy_to(const HipExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const GKO_NOT_COMPILED(dpcpp); -void DpcppExecutor::raw_copy_to(const DpcppExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void DpcppExecutor::raw_copy_to(const DpcppExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const GKO_NOT_COMPILED(dpcpp); void DpcppExecutor::synchronize() const GKO_NOT_COMPILED(dpcpp); -void DpcppExecutor::run(const Operation &op) const +void DpcppExecutor::run(const Operation& op) const { op.run(std::static_pointer_cast( this->shared_from_this())); @@ -126,14 +126,14 @@ int DpcppExecutor::get_num_devices(std::string) { return 0; } void DpcppExecutor::set_device_property() {} -bool DpcppExecutor::verify_memory_to(const OmpExecutor *dest_exec) const +bool DpcppExecutor::verify_memory_to(const OmpExecutor* dest_exec) const { // Dummy check return this->get_device_type() == "cpu" || this->get_device_type() == "host"; } -bool DpcppExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const +bool DpcppExecutor::verify_memory_to(const DpcppExecutor* dest_exec) const { // Dummy check return dest_exec->get_device_type() == this->get_device_type() && diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp index 232a86b803b..76c1fb543cf 100644 --- a/core/device_hooks/hip_hooks.cpp +++ b/core/device_hooks/hip_hooks.cpp @@ -60,18 +60,18 @@ std::shared_ptr HipExecutor::create( } -void HipExecutor::populate_exec_info(const MachineTopology *mach_topo) +void HipExecutor::populate_exec_info(const MachineTopology* mach_topo) { // This method is always called, so cannot throw when not compiled. } -void OmpExecutor::raw_copy_to(const HipExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void OmpExecutor::raw_copy_to(const HipExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const GKO_NOT_COMPILED(hip); -void HipExecutor::raw_free(void *ptr) const noexcept +void HipExecutor::raw_free(void* ptr) const noexcept { // Free must never fail, as it can be called in destructors. // If the nvidia module was not compiled, the library couldn't have @@ -79,33 +79,33 @@ void HipExecutor::raw_free(void *ptr) const noexcept } -void *HipExecutor::raw_alloc(size_type num_bytes) const GKO_NOT_COMPILED(hip); +void* HipExecutor::raw_alloc(size_type num_bytes) const GKO_NOT_COMPILED(hip); -void HipExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void HipExecutor::raw_copy_to(const OmpExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const GKO_NOT_COMPILED(hip); -void HipExecutor::raw_copy_to(const CudaExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void HipExecutor::raw_copy_to(const CudaExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const GKO_NOT_COMPILED(hip); -void HipExecutor::raw_copy_to(const HipExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void HipExecutor::raw_copy_to(const HipExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const GKO_NOT_COMPILED(hip); -void HipExecutor::raw_copy_to(const DpcppExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void HipExecutor::raw_copy_to(const DpcppExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const GKO_NOT_COMPILED(hip); void HipExecutor::synchronize() const GKO_NOT_COMPILED(hip); -void HipExecutor::run(const Operation &op) const +void HipExecutor::run(const Operation& op) const { op.run( std::static_pointer_cast(this->shared_from_this())); diff --git a/core/factorization/factorization_kernels.hpp b/core/factorization/factorization_kernels.hpp index 02af1ed270d..f206b822dd3 100644 --- a/core/factorization/factorization_kernels.hpp +++ b/core/factorization/factorization_kernels.hpp @@ -49,34 +49,34 @@ namespace kernels { #define GKO_DECLARE_FACTORIZATION_ADD_DIAGONAL_ELEMENTS_KERNEL(ValueType, \ IndexType) \ void add_diagonal_elements(std::shared_ptr exec, \ - matrix::Csr *mtx, \ + matrix::Csr* mtx, \ bool is_sorted) #define GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_U_KERNEL(ValueType, \ IndexType) \ void initialize_row_ptrs_l_u( \ std::shared_ptr exec, \ - const matrix::Csr *system_matrix, \ - IndexType *l_row_ptrs, IndexType *u_row_ptrs) + const matrix::Csr* system_matrix, \ + IndexType* l_row_ptrs, IndexType* u_row_ptrs) #define GKO_DECLARE_FACTORIZATION_INITIALIZE_L_U_KERNEL(ValueType, IndexType) \ void initialize_l_u( \ std::shared_ptr exec, \ - const matrix::Csr *system_matrix, \ - matrix::Csr *l_factor, \ - matrix::Csr *u_factor) + const matrix::Csr* system_matrix, \ + matrix::Csr* l_factor, \ + matrix::Csr* u_factor) #define GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_KERNEL(ValueType, \ IndexType) \ void initialize_row_ptrs_l( \ std::shared_ptr exec, \ - const matrix::Csr *system_matrix, \ - IndexType *l_row_ptrs) + const matrix::Csr* system_matrix, \ + IndexType* l_row_ptrs) #define GKO_DECLARE_FACTORIZATION_INITIALIZE_L_KERNEL(ValueType, IndexType) \ void initialize_l(std::shared_ptr exec, \ - const matrix::Csr *system_matrix, \ - matrix::Csr *l_factor, \ + const matrix::Csr* system_matrix, \ + matrix::Csr* l_factor, \ bool diag_sqrt) diff --git a/core/factorization/ic.cpp b/core/factorization/ic.cpp index 34bbbcb048d..f57564b79a3 100644 --- a/core/factorization/ic.cpp +++ b/core/factorization/ic.cpp @@ -65,7 +65,7 @@ GKO_REGISTER_OPERATION(initialize_l, factorization::initialize_l); template std::unique_ptr> Ic::generate( - const std::shared_ptr &system_matrix, bool skip_sorting, + const std::shared_ptr& system_matrix, bool skip_sorting, bool both_factors) const { GKO_ASSERT_IS_SQUARE_MATRIX(system_matrix); diff --git a/core/factorization/ic_kernels.hpp b/core/factorization/ic_kernels.hpp index 4efcfe1565a..e6626bed4e6 100644 --- a/core/factorization/ic_kernels.hpp +++ b/core/factorization/ic_kernels.hpp @@ -50,7 +50,7 @@ namespace kernels { #define GKO_DECLARE_IC_COMPUTE_KERNEL(ValueType, IndexType) \ void compute(std::shared_ptr exec, \ - matrix::Csr *system_matrix) + matrix::Csr* system_matrix) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ diff --git a/core/factorization/ilu.cpp b/core/factorization/ilu.cpp index 8359bf3f465..3548fc5b992 100644 --- a/core/factorization/ilu.cpp +++ b/core/factorization/ilu.cpp @@ -65,7 +65,7 @@ GKO_REGISTER_OPERATION(initialize_l_u, factorization::initialize_l_u); template std::unique_ptr> Ilu::generate_l_u( - const std::shared_ptr &system_matrix, bool skip_sorting) const + const std::shared_ptr& system_matrix, bool skip_sorting) const { GKO_ASSERT_IS_SQUARE_MATRIX(system_matrix); diff --git a/core/factorization/ilu_kernels.hpp b/core/factorization/ilu_kernels.hpp index 50ef5fa831b..d631f9e5484 100644 --- a/core/factorization/ilu_kernels.hpp +++ b/core/factorization/ilu_kernels.hpp @@ -51,7 +51,7 @@ namespace kernels { #define GKO_DECLARE_ILU_COMPUTE_LU_KERNEL(ValueType, IndexType) \ void compute_lu(std::shared_ptr exec, \ - matrix::Csr *system_matrix) + matrix::Csr* system_matrix) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/factorization/par_ic.cpp b/core/factorization/par_ic.cpp index 6266816a059..6cd422fe90f 100644 --- a/core/factorization/par_ic.cpp +++ b/core/factorization/par_ic.cpp @@ -73,7 +73,7 @@ GKO_REGISTER_OPERATION(convert_to_coo, csr::convert_to_coo); template std::unique_ptr> ParIc::generate( - const std::shared_ptr &system_matrix, bool skip_sorting, + const std::shared_ptr& system_matrix, bool skip_sorting, bool both_factors) const { using CsrMatrix = matrix::Csr; diff --git a/core/factorization/par_ic_kernels.hpp b/core/factorization/par_ic_kernels.hpp index 4611cf88fa3..332b297469b 100644 --- a/core/factorization/par_ic_kernels.hpp +++ b/core/factorization/par_ic_kernels.hpp @@ -52,13 +52,13 @@ namespace kernels { #define GKO_DECLARE_PAR_IC_INIT_FACTOR_KERNEL(ValueType, IndexType) \ void init_factor(std::shared_ptr exec, \ - matrix::Csr *l_factor) + matrix::Csr* l_factor) #define GKO_DECLARE_PAR_IC_COMPUTE_FACTOR_KERNEL(ValueType, IndexType) \ void compute_factor( \ std::shared_ptr exec, size_type iterations, \ - const matrix::Coo *lower_system_matrix, \ - matrix::Csr *l_factor) + const matrix::Coo* lower_system_matrix, \ + matrix::Csr* l_factor) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ diff --git a/core/factorization/par_ict.cpp b/core/factorization/par_ict.cpp index 93050b5c67a..eed85dc8b89 100644 --- a/core/factorization/par_ict.cpp +++ b/core/factorization/par_ict.cpp @@ -108,7 +108,7 @@ struct ParIctState { // use the approximate selection/filter kernels? bool use_approx_select; // system matrix A - const CsrMatrix *system_matrix; + const CsrMatrix* system_matrix; // current lower factor L std::unique_ptr l; // current upper factor L^H @@ -129,7 +129,7 @@ struct ParIctState { std::shared_ptr lh_strategy; ParIctState(std::shared_ptr exec_in, - const CsrMatrix *system_matrix_in, + const CsrMatrix* system_matrix_in, std::unique_ptr l_in, IndexType l_nnz_limit, bool use_approx_select, std::shared_ptr l_strategy_, @@ -167,7 +167,7 @@ struct ParIctState { template std::unique_ptr> ParIct::generate_l_lt( - const std::shared_ptr &system_matrix) const + const std::shared_ptr& system_matrix) const { using CsrMatrix = matrix::Csr; diff --git a/core/factorization/par_ict_kernels.hpp b/core/factorization/par_ict_kernels.hpp index 04d0af80e0c..b42cd9bdc28 100644 --- a/core/factorization/par_ict_kernels.hpp +++ b/core/factorization/par_ict_kernels.hpp @@ -52,16 +52,16 @@ namespace kernels { #define GKO_DECLARE_PAR_ICT_ADD_CANDIDATES_KERNEL(ValueType, IndexType) \ void add_candidates(std::shared_ptr exec, \ - const matrix::Csr *llh, \ - const matrix::Csr *a, \ - const matrix::Csr *l, \ - matrix::Csr *l_new) + const matrix::Csr* llh, \ + const matrix::Csr* a, \ + const matrix::Csr* l, \ + matrix::Csr* l_new) #define GKO_DECLARE_PAR_ICT_COMPUTE_FACTOR_KERNEL(ValueType, IndexType) \ void compute_factor(std::shared_ptr exec, \ - const matrix::Csr *a, \ - matrix::Csr *l, \ - const matrix::Coo *l_coo) + const matrix::Csr* a, \ + matrix::Csr* l, \ + const matrix::Coo* l_coo) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ diff --git a/core/factorization/par_ilu.cpp b/core/factorization/par_ilu.cpp index 995902b9bb4..a887b86dbf6 100644 --- a/core/factorization/par_ilu.cpp +++ b/core/factorization/par_ilu.cpp @@ -72,7 +72,7 @@ GKO_REGISTER_OPERATION(csr_transpose, csr::transpose); template std::unique_ptr> ParIlu::generate_l_u( - const std::shared_ptr &system_matrix, bool skip_sorting, + const std::shared_ptr& system_matrix, bool skip_sorting, std::shared_ptr l_strategy, std::shared_ptr u_strategy) const { @@ -131,13 +131,13 @@ ParIlu::generate_l_u( // Since `transpose()` returns an `std::unique_ptr`, we need to // convert it to `u_matrix_type *` in order to use it. auto u_factor_transpose = - static_cast(u_factor_transpose_lin_op.get()); + static_cast(u_factor_transpose_lin_op.get()); // At first, test if the given system_matrix was already a Coo matrix, // so no conversion would be necessary. std::unique_ptr coo_system_matrix_unique_ptr{nullptr}; auto coo_system_matrix_ptr = - dynamic_cast(system_matrix.get()); + dynamic_cast(system_matrix.get()); // If it was not, and we already own a CSR `system_matrix`, // we can move the Csr matrix to Coo, which has very little overhead. diff --git a/core/factorization/par_ilu_kernels.hpp b/core/factorization/par_ilu_kernels.hpp index ea612d66ad9..bde424f8e07 100644 --- a/core/factorization/par_ilu_kernels.hpp +++ b/core/factorization/par_ilu_kernels.hpp @@ -51,9 +51,9 @@ namespace kernels { #define GKO_DECLARE_PAR_ILU_COMPUTE_L_U_FACTORS_KERNEL(ValueType, IndexType) \ void compute_l_u_factors( \ std::shared_ptr exec, size_type iterations, \ - const matrix::Coo *system_matrix, \ - matrix::Csr *l_factor, \ - matrix::Csr *u_factor) + const matrix::Coo* system_matrix, \ + matrix::Csr* l_factor, \ + matrix::Csr* u_factor) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/factorization/par_ilut.cpp b/core/factorization/par_ilut.cpp index 3cd7f12b0f7..1f2280701ae 100644 --- a/core/factorization/par_ilut.cpp +++ b/core/factorization/par_ilut.cpp @@ -110,7 +110,7 @@ struct ParIlutState { // use the approximate selection/filter kernels? bool use_approx_select; // system matrix A - const CsrMatrix *system_matrix; + const CsrMatrix* system_matrix; // current lower factor L std::unique_ptr l; // current upper factor U @@ -139,7 +139,7 @@ struct ParIlutState { std::shared_ptr u_strategy; ParIlutState(std::shared_ptr exec_in, - const CsrMatrix *system_matrix_in, + const CsrMatrix* system_matrix_in, std::unique_ptr l_in, std::unique_ptr u_in, IndexType l_nnz_limit, IndexType u_nnz_limit, bool use_approx_select, @@ -183,7 +183,7 @@ struct ParIlutState { template std::unique_ptr> ParIlut::generate_l_u( - const std::shared_ptr &system_matrix) const + const std::shared_ptr& system_matrix) const { using CsrMatrix = matrix::Csr; @@ -299,7 +299,7 @@ void ParIlutState::iterate() auto u_filter_rank = std::max(0, u_nnz - u_nnz_limit - 1); remove_complex l_threshold{}; remove_complex u_threshold{}; - CooMatrix *null_coo = nullptr; + CooMatrix* null_coo = nullptr; if (use_approx_select) { // remove approximately smallest candidates from L' and U'^T exec->run(make_threshold_filter_approx(l_new.get(), l_filter_rank, diff --git a/core/factorization/par_ilut_kernels.hpp b/core/factorization/par_ilut_kernels.hpp index c484caaf81d..578325f5e94 100644 --- a/core/factorization/par_ilut_kernels.hpp +++ b/core/factorization/par_ilut_kernels.hpp @@ -52,45 +52,45 @@ namespace kernels { #define GKO_DECLARE_PAR_ILUT_ADD_CANDIDATES_KERNEL(ValueType, IndexType) \ void add_candidates(std::shared_ptr exec, \ - const matrix::Csr *lu, \ - const matrix::Csr *a, \ - const matrix::Csr *l, \ - const matrix::Csr *u, \ - matrix::Csr *l_new, \ - matrix::Csr *u_new) + const matrix::Csr* lu, \ + const matrix::Csr* a, \ + const matrix::Csr* l, \ + const matrix::Csr* u, \ + matrix::Csr* l_new, \ + matrix::Csr* u_new) #define GKO_DECLARE_PAR_ILUT_COMPUTE_LU_FACTORS_KERNEL(ValueType, IndexType) \ void compute_l_u_factors(std::shared_ptr exec, \ - const matrix::Csr *a, \ - matrix::Csr *l, \ - const matrix::Coo *l_coo, \ - matrix::Csr *u, \ - const matrix::Coo *u_coo, \ - matrix::Csr *u_csc) + const matrix::Csr* a, \ + matrix::Csr* l, \ + const matrix::Coo* l_coo, \ + matrix::Csr* u, \ + const matrix::Coo* u_coo, \ + matrix::Csr* u_csc) #define GKO_DECLARE_PAR_ILUT_THRESHOLD_SELECT_KERNEL(ValueType, IndexType) \ void threshold_select(std::shared_ptr exec, \ - const matrix::Csr *m, \ - IndexType rank, Array &tmp, \ - Array> &tmp2, \ - remove_complex &threshold) + const matrix::Csr* m, \ + IndexType rank, Array& tmp, \ + Array>& tmp2, \ + remove_complex& threshold) #define GKO_DECLARE_PAR_ILUT_THRESHOLD_FILTER_KERNEL(ValueType, IndexType) \ void threshold_filter(std::shared_ptr exec, \ - const matrix::Csr *m, \ + const matrix::Csr* m, \ remove_complex threshold, \ - matrix::Csr *m_out, \ - matrix::Coo *m_out_coo, \ + matrix::Csr* m_out, \ + matrix::Coo* m_out_coo, \ bool lower) #define GKO_DECLARE_PAR_ILUT_THRESHOLD_FILTER_APPROX_KERNEL(ValueType, \ IndexType) \ void threshold_filter_approx(std::shared_ptr exec, \ - const matrix::Csr *m, \ - IndexType rank, Array &tmp, \ - remove_complex &threshold, \ - matrix::Csr *m_out, \ - matrix::Coo *m_out_coo) + const matrix::Csr* m, \ + IndexType rank, Array& tmp, \ + remove_complex& threshold, \ + matrix::Csr* m_out, \ + matrix::Coo* m_out_coo) #define GKO_DECLARE_ALL_AS_TEMPLATES \ constexpr auto sampleselect_searchtree_height = 8; \ diff --git a/core/log/convergence.cpp b/core/log/convergence.cpp index 7db17ebb531..34d9a46c446 100644 --- a/core/log/convergence.cpp +++ b/core/log/convergence.cpp @@ -45,12 +45,12 @@ namespace log { template void Convergence::on_criterion_check_completed( - const stop::Criterion *criterion, const size_type &num_iterations, - const LinOp *residual, const LinOp *residual_norm, - const LinOp *implicit_sq_resnorm, const LinOp *solution, - const uint8 &stopping_id, const bool &set_finalized, - const Array *status, const bool &one_changed, - const bool &stopped) const + const stop::Criterion* criterion, const size_type& num_iterations, + const LinOp* residual, const LinOp* residual_norm, + const LinOp* implicit_sq_resnorm, const LinOp* solution, + const uint8& stopping_id, const bool& set_finalized, + const Array* status, const bool& one_changed, + const bool& stopped) const { if (stopped) { Array tmp(status->get_executor()->get_master(), @@ -86,11 +86,11 @@ void Convergence::on_criterion_check_completed( template void Convergence::on_criterion_check_completed( - const stop::Criterion *criterion, const size_type &num_iterations, - const LinOp *residual, const LinOp *residual_norm, const LinOp *solution, - const uint8 &stopping_id, const bool &set_finalized, - const Array *status, const bool &one_changed, - const bool &stopped) const + const stop::Criterion* criterion, const size_type& num_iterations, + const LinOp* residual, const LinOp* residual_norm, const LinOp* solution, + const uint8& stopping_id, const bool& set_finalized, + const Array* status, const bool& one_changed, + const bool& stopped) const { this->on_criterion_check_completed( criterion, num_iterations, residual, residual_norm, nullptr, solution, diff --git a/core/log/papi.cpp b/core/log/papi.cpp index bfb5a16d00b..c3d4f6c0c13 100644 --- a/core/log/papi.cpp +++ b/core/log/papi.cpp @@ -42,43 +42,43 @@ namespace log { template -void Papi::on_allocation_started(const Executor *exec, - const size_type &num_bytes) const +void Papi::on_allocation_started(const Executor* exec, + const size_type& num_bytes) const { allocation_started.get_counter(exec) += num_bytes; } template -void Papi::on_allocation_completed(const Executor *exec, - const size_type &num_bytes, - const uintptr &location) const +void Papi::on_allocation_completed(const Executor* exec, + const size_type& num_bytes, + const uintptr& location) const { allocation_completed.get_counter(exec) += num_bytes; } template -void Papi::on_free_started(const Executor *exec, - const uintptr &location) const +void Papi::on_free_started(const Executor* exec, + const uintptr& location) const { free_started.get_counter(exec) += 1; } template -void Papi::on_free_completed(const Executor *exec, - const uintptr &location) const +void Papi::on_free_completed(const Executor* exec, + const uintptr& location) const { free_completed.get_counter(exec) += 1; } template -void Papi::on_copy_started(const Executor *from, const Executor *to, - const uintptr &location_from, - const uintptr &location_to, - const size_type &num_bytes) const +void Papi::on_copy_started(const Executor* from, const Executor* to, + const uintptr& location_from, + const uintptr& location_to, + const size_type& num_bytes) const { copy_started_from.get_counter(from) += num_bytes; copy_started_to.get_counter(to) += num_bytes; @@ -86,11 +86,11 @@ void Papi::on_copy_started(const Executor *from, const Executor *to, template -void Papi::on_copy_completed(const Executor *from, - const Executor *to, - const uintptr &location_from, - const uintptr &location_to, - const size_type &num_bytes) const +void Papi::on_copy_completed(const Executor* from, + const Executor* to, + const uintptr& location_from, + const uintptr& location_to, + const size_type& num_bytes) const { copy_completed_from.get_counter(from) += num_bytes; copy_completed_to.get_counter(to) += num_bytes; @@ -98,16 +98,16 @@ void Papi::on_copy_completed(const Executor *from, template -void Papi::on_operation_launched(const Executor *exec, - const Operation *operation) const +void Papi::on_operation_launched(const Executor* exec, + const Operation* operation) const { operation_launched.get_counter(exec) += 1; } template -void Papi::on_operation_completed(const Executor *exec, - const Operation *operation) const +void Papi::on_operation_completed(const Executor* exec, + const Operation* operation) const { operation_completed.get_counter(exec) += 1; } @@ -115,7 +115,7 @@ void Papi::on_operation_completed(const Executor *exec, template void Papi::on_polymorphic_object_create_started( - const Executor *exec, const PolymorphicObject *po) const + const Executor* exec, const PolymorphicObject* po) const { polymorphic_object_create_started.get_counter(exec) += 1; } @@ -123,8 +123,8 @@ void Papi::on_polymorphic_object_create_started( template void Papi::on_polymorphic_object_create_completed( - const Executor *exec, const PolymorphicObject *input, - const PolymorphicObject *output) const + const Executor* exec, const PolymorphicObject* input, + const PolymorphicObject* output) const { polymorphic_object_create_completed.get_counter(exec) += 1; } @@ -132,8 +132,8 @@ void Papi::on_polymorphic_object_create_completed( template void Papi::on_polymorphic_object_copy_started( - const Executor *exec, const PolymorphicObject *from, - const PolymorphicObject *to) const + const Executor* exec, const PolymorphicObject* from, + const PolymorphicObject* to) const { polymorphic_object_copy_started.get_counter(exec) += 1; } @@ -141,8 +141,8 @@ void Papi::on_polymorphic_object_copy_started( template void Papi::on_polymorphic_object_copy_completed( - const Executor *exec, const PolymorphicObject *from, - const PolymorphicObject *to) const + const Executor* exec, const PolymorphicObject* from, + const PolymorphicObject* to) const { polymorphic_object_copy_completed.get_counter(exec) += 1; } @@ -150,45 +150,45 @@ void Papi::on_polymorphic_object_copy_completed( template void Papi::on_polymorphic_object_deleted( - const Executor *exec, const PolymorphicObject *po) const + const Executor* exec, const PolymorphicObject* po) const { polymorphic_object_deleted.get_counter(exec) += 1; } template -void Papi::on_linop_apply_started(const LinOp *A, const LinOp *b, - const LinOp *x) const +void Papi::on_linop_apply_started(const LinOp* A, const LinOp* b, + const LinOp* x) const { linop_apply_started.get_counter(A) += 1; } template -void Papi::on_linop_apply_completed(const LinOp *A, const LinOp *b, - const LinOp *x) const +void Papi::on_linop_apply_completed(const LinOp* A, const LinOp* b, + const LinOp* x) const { linop_apply_completed.get_counter(A) += 1; } template -void Papi::on_linop_advanced_apply_started(const LinOp *A, - const LinOp *alpha, - const LinOp *b, - const LinOp *beta, - const LinOp *x) const +void Papi::on_linop_advanced_apply_started(const LinOp* A, + const LinOp* alpha, + const LinOp* b, + const LinOp* beta, + const LinOp* x) const { linop_advanced_apply_started.get_counter(A) += 1; } template -void Papi::on_linop_advanced_apply_completed(const LinOp *A, - const LinOp *alpha, - const LinOp *b, - const LinOp *beta, - const LinOp *x) const +void Papi::on_linop_advanced_apply_completed(const LinOp* A, + const LinOp* alpha, + const LinOp* b, + const LinOp* beta, + const LinOp* x) const { linop_advanced_apply_completed.get_counter(A) += 1; } @@ -196,7 +196,7 @@ void Papi::on_linop_advanced_apply_completed(const LinOp *A, template void Papi::on_linop_factory_generate_started( - const LinOpFactory *factory, const LinOp *input) const + const LinOpFactory* factory, const LinOp* input) const { linop_factory_generate_started.get_counter(factory) += 1; } @@ -204,7 +204,7 @@ void Papi::on_linop_factory_generate_started( template void Papi::on_linop_factory_generate_completed( - const LinOpFactory *factory, const LinOp *input, const LinOp *output) const + const LinOpFactory* factory, const LinOp* input, const LinOp* output) const { linop_factory_generate_completed.get_counter(factory) += 1; } @@ -212,11 +212,11 @@ void Papi::on_linop_factory_generate_completed( template void Papi::on_criterion_check_completed( - const stop::Criterion *criterion, const size_type &num_iterations, - const LinOp *residual, const LinOp *residual_norm, const LinOp *solution, - const uint8 &stoppingId, const bool &setFinalized, - const Array *status, const bool &oneChanged, - const bool &converged) const + const stop::Criterion* criterion, const size_type& num_iterations, + const LinOp* residual, const LinOp* residual_norm, const LinOp* solution, + const uint8& stoppingId, const bool& setFinalized, + const Array* status, const bool& oneChanged, + const bool& converged) const { using Vector = matrix::Dense; double residual_norm_d = 0.0; @@ -234,11 +234,11 @@ void Papi::on_criterion_check_completed( } const auto tmp = reinterpret_cast(criterion); - auto &map = this->criterion_check_completed; + auto& map = this->criterion_check_completed; if (map.find(tmp) == map.end()) { map[tmp] = NULL; } - void *handle = map[tmp]; + void* handle = map[tmp]; if (!handle) { std::ostringstream oss; oss << "criterion_check_completed_" << tmp; @@ -251,11 +251,11 @@ void Papi::on_criterion_check_completed( template -void Papi::on_iteration_complete(const LinOp *solver, - const size_type &num_iterations, - const LinOp *residual, - const LinOp *solution, - const LinOp *residual_norm) const +void Papi::on_iteration_complete(const LinOp* solver, + const size_type& num_iterations, + const LinOp* residual, + const LinOp* solution, + const LinOp* residual_norm) const { this->on_iteration_complete(solver, num_iterations, residual, solution, residual_norm, nullptr); @@ -264,9 +264,9 @@ void Papi::on_iteration_complete(const LinOp *solver, template void Papi::on_iteration_complete( - const LinOp *solver, const size_type &num_iterations, const LinOp *residual, - const LinOp *solution, const LinOp *residual_norm, - const LinOp *implicit_sq_residual_norm) const + const LinOp* solver, const size_type& num_iterations, const LinOp* residual, + const LinOp* solution, const LinOp* residual_norm, + const LinOp* implicit_sq_residual_norm) const { iteration_complete.get_counter(solver) = num_iterations; } diff --git a/core/log/record.cpp b/core/log/record.cpp index ba249edf3e6..363b9d7bfc3 100644 --- a/core/log/record.cpp +++ b/core/log/record.cpp @@ -42,8 +42,8 @@ namespace gko { namespace log { -void Record::on_allocation_started(const Executor *exec, - const size_type &num_bytes) const +void Record::on_allocation_started(const Executor* exec, + const size_type& num_bytes) const { append_deque(data_.allocation_started, (std::unique_ptr( @@ -51,9 +51,9 @@ void Record::on_allocation_started(const Executor *exec, } -void Record::on_allocation_completed(const Executor *exec, - const size_type &num_bytes, - const uintptr &location) const +void Record::on_allocation_completed(const Executor* exec, + const size_type& num_bytes, + const uintptr& location) const { append_deque(data_.allocation_completed, (std::unique_ptr( @@ -61,8 +61,8 @@ void Record::on_allocation_completed(const Executor *exec, } -void Record::on_free_started(const Executor *exec, - const uintptr &location) const +void Record::on_free_started(const Executor* exec, + const uintptr& location) const { append_deque( data_.free_started, @@ -70,8 +70,8 @@ void Record::on_free_started(const Executor *exec, } -void Record::on_free_completed(const Executor *exec, - const uintptr &location) const +void Record::on_free_completed(const Executor* exec, + const uintptr& location) const { append_deque( data_.free_completed, @@ -79,10 +79,10 @@ void Record::on_free_completed(const Executor *exec, } -void Record::on_copy_started(const Executor *from, const Executor *to, - const uintptr &location_from, - const uintptr &location_to, - const size_type &num_bytes) const +void Record::on_copy_started(const Executor* from, const Executor* to, + const uintptr& location_from, + const uintptr& location_to, + const size_type& num_bytes) const { using tuple = std::tuple; append_deque( @@ -92,10 +92,10 @@ void Record::on_copy_started(const Executor *from, const Executor *to, } -void Record::on_copy_completed(const Executor *from, const Executor *to, - const uintptr &location_from, - const uintptr &location_to, - const size_type &num_bytes) const +void Record::on_copy_completed(const Executor* from, const Executor* to, + const uintptr& location_from, + const uintptr& location_to, + const size_type& num_bytes) const { using tuple = std::tuple; append_deque( @@ -105,8 +105,8 @@ void Record::on_copy_completed(const Executor *from, const Executor *to, } -void Record::on_operation_launched(const Executor *exec, - const Operation *operation) const +void Record::on_operation_launched(const Executor* exec, + const Operation* operation) const { append_deque( data_.operation_launched, @@ -114,8 +114,8 @@ void Record::on_operation_launched(const Executor *exec, } -void Record::on_operation_completed(const Executor *exec, - const Operation *operation) const +void Record::on_operation_completed(const Executor* exec, + const Operation* operation) const { append_deque( data_.operation_completed, @@ -124,7 +124,7 @@ void Record::on_operation_completed(const Executor *exec, void Record::on_polymorphic_object_create_started( - const Executor *exec, const PolymorphicObject *po) const + const Executor* exec, const PolymorphicObject* po) const { append_deque(data_.polymorphic_object_create_started, (std::unique_ptr( @@ -133,8 +133,8 @@ void Record::on_polymorphic_object_create_started( void Record::on_polymorphic_object_create_completed( - const Executor *exec, const PolymorphicObject *input, - const PolymorphicObject *output) const + const Executor* exec, const PolymorphicObject* input, + const PolymorphicObject* output) const { append_deque(data_.polymorphic_object_create_completed, (std::unique_ptr( @@ -143,8 +143,8 @@ void Record::on_polymorphic_object_create_completed( void Record::on_polymorphic_object_copy_started( - const Executor *exec, const PolymorphicObject *from, - const PolymorphicObject *to) const + const Executor* exec, const PolymorphicObject* from, + const PolymorphicObject* to) const { append_deque(data_.polymorphic_object_copy_started, (std::unique_ptr( @@ -153,8 +153,8 @@ void Record::on_polymorphic_object_copy_started( void Record::on_polymorphic_object_copy_completed( - const Executor *exec, const PolymorphicObject *from, - const PolymorphicObject *to) const + const Executor* exec, const PolymorphicObject* from, + const PolymorphicObject* to) const { append_deque(data_.polymorphic_object_copy_completed, (std::unique_ptr( @@ -162,8 +162,8 @@ void Record::on_polymorphic_object_copy_completed( } -void Record::on_polymorphic_object_deleted(const Executor *exec, - const PolymorphicObject *po) const +void Record::on_polymorphic_object_deleted(const Executor* exec, + const PolymorphicObject* po) const { append_deque(data_.polymorphic_object_deleted, (std::unique_ptr( @@ -171,8 +171,8 @@ void Record::on_polymorphic_object_deleted(const Executor *exec, } -void Record::on_linop_apply_started(const LinOp *A, const LinOp *b, - const LinOp *x) const +void Record::on_linop_apply_started(const LinOp* A, const LinOp* b, + const LinOp* x) const { append_deque(data_.linop_apply_started, (std::unique_ptr( @@ -180,8 +180,8 @@ void Record::on_linop_apply_started(const LinOp *A, const LinOp *b, } -void Record::on_linop_apply_completed(const LinOp *A, const LinOp *b, - const LinOp *x) const +void Record::on_linop_apply_completed(const LinOp* A, const LinOp* b, + const LinOp* x) const { append_deque(data_.linop_apply_completed, (std::unique_ptr( @@ -189,9 +189,9 @@ void Record::on_linop_apply_completed(const LinOp *A, const LinOp *b, } -void Record::on_linop_advanced_apply_started(const LinOp *A, const LinOp *alpha, - const LinOp *b, const LinOp *beta, - const LinOp *x) const +void Record::on_linop_advanced_apply_started(const LinOp* A, const LinOp* alpha, + const LinOp* b, const LinOp* beta, + const LinOp* x) const { append_deque( data_.linop_advanced_apply_started, @@ -199,11 +199,11 @@ void Record::on_linop_advanced_apply_started(const LinOp *A, const LinOp *alpha, } -void Record::on_linop_advanced_apply_completed(const LinOp *A, - const LinOp *alpha, - const LinOp *b, - const LinOp *beta, - const LinOp *x) const +void Record::on_linop_advanced_apply_completed(const LinOp* A, + const LinOp* alpha, + const LinOp* b, + const LinOp* beta, + const LinOp* x) const { append_deque( data_.linop_advanced_apply_completed, @@ -211,8 +211,8 @@ void Record::on_linop_advanced_apply_completed(const LinOp *A, } -void Record::on_linop_factory_generate_started(const LinOpFactory *factory, - const LinOp *input) const +void Record::on_linop_factory_generate_started(const LinOpFactory* factory, + const LinOp* input) const { append_deque(data_.linop_factory_generate_started, (std::unique_ptr( @@ -220,9 +220,9 @@ void Record::on_linop_factory_generate_started(const LinOpFactory *factory, } -void Record::on_linop_factory_generate_completed(const LinOpFactory *factory, - const LinOp *input, - const LinOp *output) const +void Record::on_linop_factory_generate_completed(const LinOpFactory* factory, + const LinOp* input, + const LinOp* output) const { append_deque(data_.linop_factory_generate_completed, (std::unique_ptr( @@ -231,9 +231,9 @@ void Record::on_linop_factory_generate_completed(const LinOpFactory *factory, void Record::on_criterion_check_started( - const stop::Criterion *criterion, const size_type &num_iterations, - const LinOp *residual, const LinOp *residual_norm, const LinOp *solution, - const uint8 &stopping_id, const bool &set_finalized) const + const stop::Criterion* criterion, const size_type& num_iterations, + const LinOp* residual, const LinOp* residual_norm, const LinOp* solution, + const uint8& stopping_id, const bool& set_finalized) const { append_deque(data_.criterion_check_started, (std::unique_ptr(new criterion_data{ @@ -243,12 +243,12 @@ void Record::on_criterion_check_started( void Record::on_criterion_check_completed( - const stop::Criterion *criterion, const size_type &num_iterations, - const LinOp *residual, const LinOp *residual_norm, - const LinOp *implicit_residual_norm_sq, const LinOp *solution, - const uint8 &stopping_id, const bool &set_finalized, - const Array *status, const bool &oneChanged, - const bool &converged) const + const stop::Criterion* criterion, const size_type& num_iterations, + const LinOp* residual, const LinOp* residual_norm, + const LinOp* implicit_residual_norm_sq, const LinOp* solution, + const uint8& stopping_id, const bool& set_finalized, + const Array* status, const bool& oneChanged, + const bool& converged) const { append_deque( data_.criterion_check_completed, @@ -259,11 +259,11 @@ void Record::on_criterion_check_completed( void Record::on_criterion_check_completed( - const stop::Criterion *criterion, const size_type &num_iterations, - const LinOp *residual, const LinOp *residual_norm, const LinOp *solution, - const uint8 &stopping_id, const bool &set_finalized, - const Array *status, const bool &oneChanged, - const bool &converged) const + const stop::Criterion* criterion, const size_type& num_iterations, + const LinOp* residual, const LinOp* residual_norm, const LinOp* solution, + const uint8& stopping_id, const bool& set_finalized, + const Array* status, const bool& oneChanged, + const bool& converged) const { this->on_criterion_check_completed( criterion, num_iterations, residual, residual_norm, nullptr, solution, @@ -271,21 +271,21 @@ void Record::on_criterion_check_completed( } -void Record::on_iteration_complete(const LinOp *solver, - const size_type &num_iterations, - const LinOp *residual, const LinOp *solution, - const LinOp *residual_norm) const +void Record::on_iteration_complete(const LinOp* solver, + const size_type& num_iterations, + const LinOp* residual, const LinOp* solution, + const LinOp* residual_norm) const { this->on_iteration_complete(solver, num_iterations, residual, solution, residual_norm, nullptr); } -void Record::on_iteration_complete(const LinOp *solver, - const size_type &num_iterations, - const LinOp *residual, const LinOp *solution, - const LinOp *residual_norm, - const LinOp *implicit_sq_residual_norm) const +void Record::on_iteration_complete(const LinOp* solver, + const size_type& num_iterations, + const LinOp* residual, const LinOp* solution, + const LinOp* residual_norm, + const LinOp* implicit_sq_residual_norm) const { append_deque( data_.iteration_completed, diff --git a/core/log/stream.cpp b/core/log/stream.cpp index bc0c32a7c48..b0923052cde 100644 --- a/core/log/stream.cpp +++ b/core/log/stream.cpp @@ -52,7 +52,7 @@ namespace { template -std::ostream &operator<<(std::ostream &os, const matrix::Dense *mtx) +std::ostream& operator<<(std::ostream& os, const matrix::Dense* mtx) { auto exec = mtx->get_executor(); auto tmp = make_temporary_clone(exec->get_master(), mtx); @@ -67,7 +67,7 @@ std::ostream &operator<<(std::ostream &os, const matrix::Dense *mtx) } -std::ostream &operator<<(std::ostream &os, const stopping_status *status) +std::ostream& operator<<(std::ostream& os, const stopping_status* status) { os << "[" << std::endl; os << "\tConverged: " << status->has_converged() << std::endl; @@ -78,7 +78,7 @@ std::ostream &operator<<(std::ostream &os, const stopping_status *status) } -std::string bytes_name(const size_type &num_bytes) +std::string bytes_name(const size_type& num_bytes) { std::ostringstream oss; oss << "Bytes[" << num_bytes << "]"; @@ -86,7 +86,7 @@ std::string bytes_name(const size_type &num_bytes) } -std::string location_name(const uintptr &location) +std::string location_name(const uintptr& location) { std::ostringstream oss; oss << "Location[" << std::hex << "0x" << location << "]" << std::dec; @@ -95,7 +95,7 @@ std::string location_name(const uintptr &location) #define GKO_ENABLE_DEMANGLE_NAME(_object_type) \ - std::string demangle_name(const _object_type *object) \ + std::string demangle_name(const _object_type* object) \ { \ std::ostringstream oss; \ oss << #_object_type "["; \ @@ -123,8 +123,8 @@ GKO_ENABLE_DEMANGLE_NAME(Operation); template -void Stream::on_allocation_started(const Executor *exec, - const size_type &num_bytes) const +void Stream::on_allocation_started(const Executor* exec, + const size_type& num_bytes) const { os_ << prefix_ << "allocation started on " << demangle_name(exec) << " with " << bytes_name(num_bytes) << std::endl; @@ -132,9 +132,9 @@ void Stream::on_allocation_started(const Executor *exec, template -void Stream::on_allocation_completed(const Executor *exec, - const size_type &num_bytes, - const uintptr &location) const +void Stream::on_allocation_completed(const Executor* exec, + const size_type& num_bytes, + const uintptr& location) const { os_ << prefix_ << "allocation completed on " << demangle_name(exec) << " at " << location_name(location) << " with " @@ -143,8 +143,8 @@ void Stream::on_allocation_completed(const Executor *exec, template -void Stream::on_free_started(const Executor *exec, - const uintptr &location) const +void Stream::on_free_started(const Executor* exec, + const uintptr& location) const { os_ << prefix_ << "free started on " << demangle_name(exec) << " at " << location_name(location) << std::endl; @@ -152,8 +152,8 @@ void Stream::on_free_started(const Executor *exec, template -void Stream::on_free_completed(const Executor *exec, - const uintptr &location) const +void Stream::on_free_completed(const Executor* exec, + const uintptr& location) const { os_ << prefix_ << "free completed on " << demangle_name(exec) << " at " << location_name(location) << std::endl; @@ -161,11 +161,11 @@ void Stream::on_free_completed(const Executor *exec, template -void Stream::on_copy_started(const Executor *from, - const Executor *to, - const uintptr &location_from, - const uintptr &location_to, - const size_type &num_bytes) const +void Stream::on_copy_started(const Executor* from, + const Executor* to, + const uintptr& location_from, + const uintptr& location_to, + const size_type& num_bytes) const { os_ << prefix_ << "copy started from " << demangle_name(from) << " to " << demangle_name(to) << " from " << location_name(location_from) @@ -175,11 +175,11 @@ void Stream::on_copy_started(const Executor *from, template -void Stream::on_copy_completed(const Executor *from, - const Executor *to, - const uintptr &location_from, - const uintptr &location_to, - const size_type &num_bytes) const +void Stream::on_copy_completed(const Executor* from, + const Executor* to, + const uintptr& location_from, + const uintptr& location_to, + const size_type& num_bytes) const { os_ << prefix_ << "copy completed from " << demangle_name(from) << " to " << demangle_name(to) << " from " << location_name(location_from) @@ -189,8 +189,8 @@ void Stream::on_copy_completed(const Executor *from, template -void Stream::on_operation_launched(const Executor *exec, - const Operation *operation) const +void Stream::on_operation_launched(const Executor* exec, + const Operation* operation) const { os_ << prefix_ << demangle_name(operation) << " started on " << demangle_name(exec) << std::endl; @@ -198,8 +198,8 @@ void Stream::on_operation_launched(const Executor *exec, template -void Stream::on_operation_completed(const Executor *exec, - const Operation *operation) const +void Stream::on_operation_completed(const Executor* exec, + const Operation* operation) const { os_ << prefix_ << demangle_name(operation) << " completed on " << demangle_name(exec) << std::endl; @@ -208,7 +208,7 @@ void Stream::on_operation_completed(const Executor *exec, template void Stream::on_polymorphic_object_create_started( - const Executor *exec, const PolymorphicObject *po) const + const Executor* exec, const PolymorphicObject* po) const { os_ << prefix_ << "PolymorphicObject create started from " << demangle_name(po) << " on " << demangle_name(exec) << std::endl; @@ -217,8 +217,8 @@ void Stream::on_polymorphic_object_create_started( template void Stream::on_polymorphic_object_create_completed( - const Executor *exec, const PolymorphicObject *input, - const PolymorphicObject *output) const + const Executor* exec, const PolymorphicObject* input, + const PolymorphicObject* output) const { os_ << prefix_ << demangle_name(output) << " create completed from " << demangle_name(input) << " on " << demangle_name(exec) << std::endl; @@ -227,8 +227,8 @@ void Stream::on_polymorphic_object_create_completed( template void Stream::on_polymorphic_object_copy_started( - const Executor *exec, const PolymorphicObject *from, - const PolymorphicObject *to) const + const Executor* exec, const PolymorphicObject* from, + const PolymorphicObject* to) const { os_ << prefix_ << demangle_name(from) << " copy started to " << demangle_name(to) << " on " << demangle_name(exec) << std::endl; @@ -237,8 +237,8 @@ void Stream::on_polymorphic_object_copy_started( template void Stream::on_polymorphic_object_copy_completed( - const Executor *exec, const PolymorphicObject *from, - const PolymorphicObject *to) const + const Executor* exec, const PolymorphicObject* from, + const PolymorphicObject* to) const { os_ << prefix_ << demangle_name(from) << " copy completed to " << demangle_name(to) << " on " << demangle_name(exec) << std::endl; @@ -247,7 +247,7 @@ void Stream::on_polymorphic_object_copy_completed( template void Stream::on_polymorphic_object_deleted( - const Executor *exec, const PolymorphicObject *po) const + const Executor* exec, const PolymorphicObject* po) const { os_ << prefix_ << demangle_name(po) << " deleted on " << demangle_name(exec) << std::endl; @@ -255,8 +255,8 @@ void Stream::on_polymorphic_object_deleted( template -void Stream::on_linop_apply_started(const LinOp *A, const LinOp *b, - const LinOp *x) const +void Stream::on_linop_apply_started(const LinOp* A, const LinOp* b, + const LinOp* x) const { os_ << prefix_ << "apply started on A " << demangle_name(A) << " with b " << demangle_name(b) << " and x " << demangle_name(x) << std::endl; @@ -272,8 +272,8 @@ void Stream::on_linop_apply_started(const LinOp *A, const LinOp *b, template -void Stream::on_linop_apply_completed(const LinOp *A, const LinOp *b, - const LinOp *x) const +void Stream::on_linop_apply_completed(const LinOp* A, const LinOp* b, + const LinOp* x) const { os_ << prefix_ << "apply completed on A " << demangle_name(A) << " with b " << demangle_name(b) << " and x " << demangle_name(x) << std::endl; @@ -289,11 +289,11 @@ void Stream::on_linop_apply_completed(const LinOp *A, const LinOp *b, template -void Stream::on_linop_advanced_apply_started(const LinOp *A, - const LinOp *alpha, - const LinOp *b, - const LinOp *beta, - const LinOp *x) const +void Stream::on_linop_advanced_apply_started(const LinOp* A, + const LinOp* alpha, + const LinOp* b, + const LinOp* beta, + const LinOp* x) const { os_ << prefix_ << "advanced apply started on A " << demangle_name(A) << " with alpha " << demangle_name(alpha) << " b " << demangle_name(b) @@ -315,11 +315,11 @@ void Stream::on_linop_advanced_apply_started(const LinOp *A, template -void Stream::on_linop_advanced_apply_completed(const LinOp *A, - const LinOp *alpha, - const LinOp *b, - const LinOp *beta, - const LinOp *x) const +void Stream::on_linop_advanced_apply_completed(const LinOp* A, + const LinOp* alpha, + const LinOp* b, + const LinOp* beta, + const LinOp* x) const { os_ << prefix_ << "advanced apply completed on A " << demangle_name(A) << " with alpha " << demangle_name(alpha) << " b " << demangle_name(b) @@ -342,7 +342,7 @@ void Stream::on_linop_advanced_apply_completed(const LinOp *A, template void Stream::on_linop_factory_generate_started( - const LinOpFactory *factory, const LinOp *input) const + const LinOpFactory* factory, const LinOp* input) const { os_ << prefix_ << "generate started for " << demangle_name(factory) << " with input " << demangle_name(input) << std::endl; @@ -351,7 +351,7 @@ void Stream::on_linop_factory_generate_started( template void Stream::on_linop_factory_generate_completed( - const LinOpFactory *factory, const LinOp *input, const LinOp *output) const + const LinOpFactory* factory, const LinOp* input, const LinOp* output) const { os_ << prefix_ << "generate completed for " << demangle_name(factory) << " with input " << demangle_name(input) << " produced " @@ -361,9 +361,9 @@ void Stream::on_linop_factory_generate_completed( template void Stream::on_criterion_check_started( - const stop::Criterion *criterion, const size_type &num_iterations, - const LinOp *residual, const LinOp *residual_norm, const LinOp *solution, - const uint8 &stopping_id, const bool &set_finalized) const + const stop::Criterion* criterion, const size_type& num_iterations, + const LinOp* residual, const LinOp* residual_norm, const LinOp* solution, + const uint8& stopping_id, const bool& set_finalized) const { os_ << prefix_ << "check started for " << demangle_name(criterion) << " at iteration " << num_iterations << " with ID " @@ -389,11 +389,11 @@ void Stream::on_criterion_check_started( template void Stream::on_criterion_check_completed( - const stop::Criterion *criterion, const size_type &num_iterations, - const LinOp *residual, const LinOp *residual_norm, const LinOp *solution, - const uint8 &stoppingId, const bool &setFinalized, - const Array *status, const bool &oneChanged, - const bool &converged) const + const stop::Criterion* criterion, const size_type& num_iterations, + const LinOp* residual, const LinOp* residual_norm, const LinOp* solution, + const uint8& stoppingId, const bool& setFinalized, + const Array* status, const bool& oneChanged, + const bool& converged) const { os_ << prefix_ << "check completed for " << demangle_name(criterion) << " at iteration " << num_iterations << " with ID " @@ -423,11 +423,11 @@ void Stream::on_criterion_check_completed( template -void Stream::on_iteration_complete(const LinOp *solver, - const size_type &num_iterations, - const LinOp *residual, - const LinOp *solution, - const LinOp *residual_norm) const +void Stream::on_iteration_complete(const LinOp* solver, + const size_type& num_iterations, + const LinOp* residual, + const LinOp* solution, + const LinOp* residual_norm) const { this->on_iteration_complete(solver, num_iterations, residual, solution, residual_norm, nullptr); @@ -436,9 +436,9 @@ void Stream::on_iteration_complete(const LinOp *solver, template void Stream::on_iteration_complete( - const LinOp *solver, const size_type &num_iterations, const LinOp *residual, - const LinOp *solution, const LinOp *residual_norm, - const LinOp *implicit_sq_residual_norm) const + const LinOp* solver, const size_type& num_iterations, const LinOp* residual, + const LinOp* solution, const LinOp* residual_norm, + const LinOp* implicit_sq_residual_norm) const { os_ << prefix_ << "iteration " << num_iterations << " completed with solver " << demangle_name(solver) diff --git a/core/matrix/coo.cpp b/core/matrix/coo.cpp index 4eda43b2dd8..ce59e7fdfb8 100644 --- a/core/matrix/coo.cpp +++ b/core/matrix/coo.cpp @@ -76,7 +76,7 @@ GKO_REGISTER_OPERATION(outplace_absolute_array, template -void Coo::apply_impl(const LinOp *b, LinOp *x) const +void Coo::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -87,8 +87,8 @@ void Coo::apply_impl(const LinOp *b, LinOp *x) const template -void Coo::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void Coo::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { @@ -100,7 +100,7 @@ void Coo::apply_impl(const LinOp *alpha, const LinOp *b, template -void Coo::apply2_impl(const LinOp *b, LinOp *x) const +void Coo::apply2_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -111,8 +111,8 @@ void Coo::apply2_impl(const LinOp *b, LinOp *x) const template -void Coo::apply2_impl(const LinOp *alpha, const LinOp *b, - LinOp *x) const +void Coo::apply2_impl(const LinOp* alpha, const LinOp* b, + LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_x) { @@ -125,7 +125,7 @@ void Coo::apply2_impl(const LinOp *alpha, const LinOp *b, template void Coo::convert_to( - Coo, IndexType> *result) const + Coo, IndexType>* result) const { result->values_ = this->values_; result->row_idxs_ = this->row_idxs_; @@ -136,7 +136,7 @@ void Coo::convert_to( template void Coo::move_to( - Coo, IndexType> *result) + Coo, IndexType>* result) { this->convert_to(result); } @@ -144,7 +144,7 @@ void Coo::move_to( template void Coo::convert_to( - Csr *result) const + Csr* result) const { auto exec = this->get_executor(); auto tmp = Csr::create( @@ -159,7 +159,7 @@ void Coo::convert_to( template -void Coo::move_to(Csr *result) +void Coo::move_to(Csr* result) { auto exec = this->get_executor(); auto tmp = Csr::create( @@ -174,7 +174,7 @@ void Coo::move_to(Csr *result) template -void Coo::convert_to(Dense *result) const +void Coo::convert_to(Dense* result) const { auto exec = this->get_executor(); auto tmp = Dense::create(exec, this->get_size()); @@ -184,22 +184,22 @@ void Coo::convert_to(Dense *result) const template -void Coo::move_to(Dense *result) +void Coo::move_to(Dense* result) { this->convert_to(result); } template -void Coo::read(const mat_data &data) +void Coo::read(const mat_data& data) { size_type nnz = 0; - for (const auto &elem : data.nonzeros) { + for (const auto& elem : data.nonzeros) { nnz += (elem.value != zero()); } auto tmp = Coo::create(this->get_executor()->get_master(), data.size, nnz); size_type elt = 0; - for (const auto &elem : data.nonzeros) { + for (const auto& elem : data.nonzeros) { auto val = elem.value; if (val != zero()) { tmp->get_row_idxs()[elt] = elem.row; @@ -213,13 +213,13 @@ void Coo::read(const mat_data &data) template -void Coo::write(mat_data &data) const +void Coo::write(mat_data& data) const { std::unique_ptr op{}; - const Coo *tmp{}; + const Coo* tmp{}; if (this->get_executor()->get_master() != this->get_executor()) { op = this->clone(this->get_executor()->get_master()); - tmp = static_cast(op.get()); + tmp = static_cast(op.get()); } else { tmp = this; } diff --git a/core/matrix/coo_builder.hpp b/core/matrix/coo_builder.hpp index 7c4ce38cb89..ab86727e503 100644 --- a/core/matrix/coo_builder.hpp +++ b/core/matrix/coo_builder.hpp @@ -55,31 +55,31 @@ class CooBuilder { /** * Returns the row index array of the COO matrix. */ - Array &get_row_idx_array() { return matrix_->row_idxs_; } + Array& get_row_idx_array() { return matrix_->row_idxs_; } /** * Returns the column index array of the COO matrix. */ - Array &get_col_idx_array() { return matrix_->col_idxs_; } + Array& get_col_idx_array() { return matrix_->col_idxs_; } /** * Returns the value array of the COO matrix. */ - Array &get_value_array() { return matrix_->values_; } + Array& get_value_array() { return matrix_->values_; } /** * Initializes a CooBuilder from an existing COO matrix. */ - explicit CooBuilder(Coo *matrix) : matrix_{matrix} {} + explicit CooBuilder(Coo* matrix) : matrix_{matrix} {} // make this type non-movable - CooBuilder(const CooBuilder &) = delete; - CooBuilder(CooBuilder &&) = delete; - CooBuilder &operator=(const CooBuilder &) = delete; - CooBuilder &operator=(CooBuilder &&) = delete; + CooBuilder(const CooBuilder&) = delete; + CooBuilder(CooBuilder&&) = delete; + CooBuilder& operator=(const CooBuilder&) = delete; + CooBuilder& operator=(CooBuilder&&) = delete; private: - Coo *matrix_; + Coo* matrix_; }; diff --git a/core/matrix/coo_kernels.hpp b/core/matrix/coo_kernels.hpp index 2f2134cf183..f63f26f3dc4 100644 --- a/core/matrix/coo_kernels.hpp +++ b/core/matrix/coo_kernels.hpp @@ -49,43 +49,43 @@ namespace kernels { #define GKO_DECLARE_COO_SPMV_KERNEL(ValueType, IndexType) \ void spmv(std::shared_ptr exec, \ - const matrix::Coo *a, \ - const matrix::Dense *b, matrix::Dense *c) + const matrix::Coo* a, \ + const matrix::Dense* b, matrix::Dense* c) #define GKO_DECLARE_COO_ADVANCED_SPMV_KERNEL(ValueType, IndexType) \ void advanced_spmv(std::shared_ptr exec, \ - const matrix::Dense *alpha, \ - const matrix::Coo *a, \ - const matrix::Dense *b, \ - const matrix::Dense *beta, \ - matrix::Dense *c) + const matrix::Dense* alpha, \ + const matrix::Coo* a, \ + const matrix::Dense* b, \ + const matrix::Dense* beta, \ + matrix::Dense* c) #define GKO_DECLARE_COO_SPMV2_KERNEL(ValueType, IndexType) \ void spmv2(std::shared_ptr exec, \ - const matrix::Coo *a, \ - const matrix::Dense *b, matrix::Dense *c) + const matrix::Coo* a, \ + const matrix::Dense* b, matrix::Dense* c) #define GKO_DECLARE_COO_ADVANCED_SPMV2_KERNEL(ValueType, IndexType) \ void advanced_spmv2(std::shared_ptr exec, \ - const matrix::Dense *alpha, \ - const matrix::Coo *a, \ - const matrix::Dense *b, \ - matrix::Dense *c) + const matrix::Dense* alpha, \ + const matrix::Coo* a, \ + const matrix::Dense* b, \ + matrix::Dense* c) #define GKO_DECLARE_COO_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) \ void convert_to_dense(std::shared_ptr exec, \ - const matrix::Coo *source, \ - matrix::Dense *result) + const matrix::Coo* source, \ + matrix::Dense* result) #define GKO_DECLARE_COO_CONVERT_TO_CSR_KERNEL(ValueType, IndexType) \ void convert_to_csr(std::shared_ptr exec, \ - const matrix::Coo *source, \ - matrix::Csr *result) + const matrix::Coo* source, \ + matrix::Csr* result) #define GKO_DECLARE_COO_EXTRACT_DIAGONAL_KERNEL(ValueType, IndexType) \ void extract_diagonal(std::shared_ptr exec, \ - const matrix::Coo *orig, \ - matrix::Diagonal *diag) + const matrix::Coo* orig, \ + matrix::Diagonal* diag) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ diff --git a/core/matrix/csr.cpp b/core/matrix/csr.cpp index 571a90ca351..055dba17e4a 100644 --- a/core/matrix/csr.cpp +++ b/core/matrix/csr.cpp @@ -96,11 +96,11 @@ GKO_REGISTER_OPERATION(outplace_absolute_array, template -void Csr::apply_impl(const LinOp *b, LinOp *x) const +void Csr::apply_impl(const LinOp* b, LinOp* x) const { using ComplexDense = Dense>; using TCsr = Csr; - if (auto b_csr = dynamic_cast(b)) { + if (auto b_csr = dynamic_cast(b)) { // if b is a CSR matrix, we compute a SpGeMM auto x_csr = as(x); this->get_executor()->run(csr::make_spgemm(this, b_csr, x_csr)); @@ -116,20 +116,20 @@ void Csr::apply_impl(const LinOp *b, LinOp *x) const template -void Csr::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void Csr::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { using ComplexDense = Dense>; using RealDense = Dense>; using TCsr = Csr; - if (auto b_csr = dynamic_cast(b)) { + if (auto b_csr = dynamic_cast(b)) { // if b is a CSR matrix, we compute a SpGeMM auto x_csr = as(x); auto x_copy = x_csr->clone(); this->get_executor()->run(csr::make_advanced_spgemm( as>(alpha), this, b_csr, as>(beta), x_copy.get(), x_csr)); - } else if (dynamic_cast *>(b)) { + } else if (dynamic_cast*>(b)) { // if b is an identity matrix, we compute an SpGEAM auto x_csr = as(x); auto x_copy = x_csr->clone(); @@ -150,7 +150,7 @@ void Csr::apply_impl(const LinOp *alpha, const LinOp *b, template void Csr::convert_to( - Csr, IndexType> *result) const + Csr, IndexType>* result) const { result->values_ = this->values_; result->col_idxs_ = this->col_idxs_; @@ -162,7 +162,7 @@ void Csr::convert_to( template void Csr::move_to( - Csr, IndexType> *result) + Csr, IndexType>* result) { this->convert_to(result); } @@ -170,7 +170,7 @@ void Csr::move_to( template void Csr::convert_to( - Coo *result) const + Coo* result) const { auto exec = this->get_executor(); auto tmp = Coo::create( @@ -183,14 +183,14 @@ void Csr::convert_to( template -void Csr::move_to(Coo *result) +void Csr::move_to(Coo* result) { this->convert_to(result); } template -void Csr::convert_to(Dense *result) const +void Csr::convert_to(Dense* result) const { auto exec = this->get_executor(); auto tmp = Dense::create(exec, this->get_size()); @@ -200,7 +200,7 @@ void Csr::convert_to(Dense *result) const template -void Csr::move_to(Dense *result) +void Csr::move_to(Dense* result) { this->convert_to(result); } @@ -208,7 +208,7 @@ void Csr::move_to(Dense *result) template void Csr::convert_to( - Hybrid *result) const + Hybrid* result) const { auto exec = this->get_executor(); Array row_nnz(exec, this->get_size()[0]); @@ -230,7 +230,7 @@ void Csr::convert_to( template -void Csr::move_to(Hybrid *result) +void Csr::move_to(Hybrid* result) { this->convert_to(result); } @@ -238,7 +238,7 @@ void Csr::move_to(Hybrid *result) template void Csr::convert_to( - Sellp *result) const + Sellp* result) const { auto exec = this->get_executor(); const auto stride_factor = (result->get_stride_factor() == 0) @@ -258,7 +258,7 @@ void Csr::convert_to( template -void Csr::move_to(Sellp *result) +void Csr::move_to(Sellp* result) { this->convert_to(result); } @@ -266,7 +266,7 @@ void Csr::move_to(Sellp *result) template void Csr::convert_to( - SparsityCsr *result) const + SparsityCsr* result) const { auto exec = this->get_executor(); auto tmp = SparsityCsr::create( @@ -284,7 +284,7 @@ void Csr::convert_to( template void Csr::move_to( - SparsityCsr *result) + SparsityCsr* result) { this->convert_to(result); } @@ -292,7 +292,7 @@ void Csr::move_to( template void Csr::convert_to( - Ell *result) const + Ell* result) const { auto exec = this->get_executor(); size_type max_nnz_per_row; @@ -305,17 +305,17 @@ void Csr::convert_to( template -void Csr::move_to(Ell *result) +void Csr::move_to(Ell* result) { this->convert_to(result); } template -void Csr::read(const mat_data &data) +void Csr::read(const mat_data& data) { size_type nnz = 0; - for (const auto &elem : data.nonzeros) { + for (const auto& elem : data.nonzeros) { nnz += (elem.value != zero()); } auto tmp = Csr::create(this->get_executor()->get_master(), data.size, nnz, @@ -343,13 +343,13 @@ void Csr::read(const mat_data &data) template -void Csr::write(mat_data &data) const +void Csr::write(mat_data& data) const { std::unique_ptr op{}; - const Csr *tmp{}; + const Csr* tmp{}; if (this->get_executor()->get_master() != this->get_executor()) { op = this->clone(this->get_executor()->get_master()); - tmp = static_cast(op.get()); + tmp = static_cast(op.get()); } else { tmp = this; } @@ -398,7 +398,7 @@ std::unique_ptr Csr::conj_transpose() const template std::unique_ptr Csr::permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { GKO_ASSERT_IS_SQUARE_MATRIX(this); GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); @@ -421,7 +421,7 @@ std::unique_ptr Csr::permute( template std::unique_ptr Csr::inverse_permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { GKO_ASSERT_IS_SQUARE_MATRIX(this); GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); @@ -440,7 +440,7 @@ std::unique_ptr Csr::inverse_permute( template std::unique_ptr Csr::row_permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); auto exec = this->get_executor(); @@ -458,7 +458,7 @@ std::unique_ptr Csr::row_permute( template std::unique_ptr Csr::column_permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]); auto exec = this->get_executor(); @@ -481,7 +481,7 @@ std::unique_ptr Csr::column_permute( template std::unique_ptr Csr::inverse_row_permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); auto exec = this->get_executor(); @@ -499,7 +499,7 @@ std::unique_ptr Csr::inverse_row_permute( template std::unique_ptr Csr::inverse_column_permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]); auto exec = this->get_executor(); diff --git a/core/matrix/csr_builder.hpp b/core/matrix/csr_builder.hpp index 37b4ac76f24..8f2c4392247 100644 --- a/core/matrix/csr_builder.hpp +++ b/core/matrix/csr_builder.hpp @@ -55,17 +55,17 @@ class CsrBuilder { /** * Returns the column index array of the CSR matrix. */ - Array &get_col_idx_array() { return matrix_->col_idxs_; } + Array& get_col_idx_array() { return matrix_->col_idxs_; } /** * Returns the value array of the CSR matrix. */ - Array &get_value_array() { return matrix_->values_; } + Array& get_value_array() { return matrix_->values_; } /** * Initializes a CsrBuilder from an existing CSR matrix. */ - explicit CsrBuilder(Csr *matrix) : matrix_{matrix} {} + explicit CsrBuilder(Csr* matrix) : matrix_{matrix} {} /** * Updates the internal matrix data structures at destruction. @@ -73,13 +73,13 @@ class CsrBuilder { ~CsrBuilder() { matrix_->make_srow(); } // make this type non-movable - CsrBuilder(const CsrBuilder &) = delete; - CsrBuilder(CsrBuilder &&) = delete; - CsrBuilder &operator=(const CsrBuilder &) = delete; - CsrBuilder &operator=(CsrBuilder &&) = delete; + CsrBuilder(const CsrBuilder&) = delete; + CsrBuilder(CsrBuilder&&) = delete; + CsrBuilder& operator=(const CsrBuilder&) = delete; + CsrBuilder& operator=(CsrBuilder&&) = delete; private: - Csr *matrix_; + Csr* matrix_; }; diff --git a/core/matrix/csr_kernels.hpp b/core/matrix/csr_kernels.hpp index 89cb7fd7e7c..e0922f3ac84 100644 --- a/core/matrix/csr_kernels.hpp +++ b/core/matrix/csr_kernels.hpp @@ -54,136 +54,136 @@ namespace kernels { #define GKO_DECLARE_CSR_SPMV_KERNEL(ValueType, IndexType) \ void spmv(std::shared_ptr exec, \ - const matrix::Csr *a, \ - const matrix::Dense *b, matrix::Dense *c) + const matrix::Csr* a, \ + const matrix::Dense* b, matrix::Dense* c) #define GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL(ValueType, IndexType) \ void advanced_spmv(std::shared_ptr exec, \ - const matrix::Dense *alpha, \ - const matrix::Csr *a, \ - const matrix::Dense *b, \ - const matrix::Dense *beta, \ - matrix::Dense *c) + const matrix::Dense* alpha, \ + const matrix::Csr* a, \ + const matrix::Dense* b, \ + const matrix::Dense* beta, \ + matrix::Dense* c) #define GKO_DECLARE_CSR_SPGEMM_KERNEL(ValueType, IndexType) \ void spgemm(std::shared_ptr exec, \ - const matrix::Csr *a, \ - const matrix::Csr *b, \ - matrix::Csr *c) + const matrix::Csr* a, \ + const matrix::Csr* b, \ + matrix::Csr* c) #define GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL(ValueType, IndexType) \ void advanced_spgemm(std::shared_ptr exec, \ - const matrix::Dense *alpha, \ - const matrix::Csr *a, \ - const matrix::Csr *b, \ - const matrix::Dense *beta, \ - const matrix::Csr *d, \ - matrix::Csr *c) + const matrix::Dense* alpha, \ + const matrix::Csr* a, \ + const matrix::Csr* b, \ + const matrix::Dense* beta, \ + const matrix::Csr* d, \ + matrix::Csr* c) #define GKO_DECLARE_CSR_SPGEAM_KERNEL(ValueType, IndexType) \ void spgeam(std::shared_ptr exec, \ - const matrix::Dense *alpha, \ - const matrix::Csr *a, \ - const matrix::Dense *beta, \ - const matrix::Csr *b, \ - matrix::Csr *c) + const matrix::Dense* alpha, \ + const matrix::Csr* a, \ + const matrix::Dense* beta, \ + const matrix::Csr* b, \ + matrix::Csr* c) #define GKO_DECLARE_CSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) \ void convert_to_dense(std::shared_ptr exec, \ - const matrix::Csr *source, \ - matrix::Dense *result) + const matrix::Csr* source, \ + matrix::Dense* result) #define GKO_DECLARE_CSR_CONVERT_TO_COO_KERNEL(ValueType, IndexType) \ void convert_to_coo(std::shared_ptr exec, \ - const matrix::Csr *source, \ - matrix::Coo *result) + const matrix::Csr* source, \ + matrix::Coo* result) #define GKO_DECLARE_CSR_CONVERT_TO_ELL_KERNEL(ValueType, IndexType) \ void convert_to_ell(std::shared_ptr exec, \ - const matrix::Csr *source, \ - matrix::Ell *result) + const matrix::Csr* source, \ + matrix::Ell* result) #define GKO_DECLARE_CSR_CONVERT_TO_HYBRID_KERNEL(ValueType, IndexType) \ void convert_to_hybrid(std::shared_ptr exec, \ - const matrix::Csr *source, \ - matrix::Hybrid *result) + const matrix::Csr* source, \ + matrix::Hybrid* result) #define GKO_DECLARE_CSR_CONVERT_TO_SELLP_KERNEL(ValueType, IndexType) \ void convert_to_sellp(std::shared_ptr exec, \ - const matrix::Csr *source, \ - matrix::Sellp *result) + const matrix::Csr* source, \ + matrix::Sellp* result) #define GKO_DECLARE_CSR_CALCULATE_TOTAL_COLS_KERNEL(ValueType, IndexType) \ void calculate_total_cols(std::shared_ptr exec, \ - const matrix::Csr *source, \ - size_type *result, size_type stride_factor, \ + const matrix::Csr* source, \ + size_type* result, size_type stride_factor, \ size_type slice_size) #define GKO_DECLARE_CSR_TRANSPOSE_KERNEL(ValueType, IndexType) \ void transpose(std::shared_ptr exec, \ - const matrix::Csr *orig, \ - matrix::Csr *trans) + const matrix::Csr* orig, \ + matrix::Csr* trans) #define GKO_DECLARE_CSR_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType) \ void conj_transpose(std::shared_ptr exec, \ - const matrix::Csr *orig, \ - matrix::Csr *trans) + const matrix::Csr* orig, \ + matrix::Csr* trans) #define GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL(ValueType, IndexType) \ void inv_symm_permute(std::shared_ptr exec, \ - const IndexType *permutation_indices, \ - const matrix::Csr *orig, \ - matrix::Csr *permuted) + const IndexType* permutation_indices, \ + const matrix::Csr* orig, \ + matrix::Csr* permuted) #define GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL(ValueType, IndexType) \ void row_permute(std::shared_ptr exec, \ - const IndexType *permutation_indices, \ - const matrix::Csr *orig, \ - matrix::Csr *row_permuted) + const IndexType* permutation_indices, \ + const matrix::Csr* orig, \ + matrix::Csr* row_permuted) #define GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL(ValueType, IndexType) \ void inverse_row_permute(std::shared_ptr exec, \ - const IndexType *permutation_indices, \ - const matrix::Csr *orig, \ - matrix::Csr *row_permuted) + const IndexType* permutation_indices, \ + const matrix::Csr* orig, \ + matrix::Csr* row_permuted) #define GKO_DECLARE_CSR_INVERSE_COLUMN_PERMUTE_KERNEL(ValueType, IndexType) \ void inverse_column_permute( \ std::shared_ptr exec, \ - const IndexType *permutation_indices, \ - const matrix::Csr *orig, \ - matrix::Csr *column_permuted) + const IndexType* permutation_indices, \ + const matrix::Csr* orig, \ + matrix::Csr* column_permuted) #define GKO_DECLARE_INVERT_PERMUTATION_KERNEL(IndexType) \ void invert_permutation( \ std::shared_ptr exec, size_type size, \ - const IndexType *permutation_indices, IndexType *inv_permutation) + const IndexType* permutation_indices, IndexType* inv_permutation) #define GKO_DECLARE_CSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, IndexType) \ void calculate_max_nnz_per_row( \ std::shared_ptr exec, \ - const matrix::Csr *source, size_type *result) + const matrix::Csr* source, size_type* result) #define GKO_DECLARE_CSR_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType, \ IndexType) \ void calculate_nonzeros_per_row( \ std::shared_ptr exec, \ - const matrix::Csr *source, \ - Array *result) + const matrix::Csr* source, \ + Array* result) #define GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType) \ void sort_by_column_index(std::shared_ptr exec, \ - matrix::Csr *to_sort) + matrix::Csr* to_sort) #define GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType) \ void is_sorted_by_column_index( \ std::shared_ptr exec, \ - const matrix::Csr *to_check, bool *is_sorted) + const matrix::Csr* to_check, bool* is_sorted) #define GKO_DECLARE_CSR_EXTRACT_DIAGONAL(ValueType, IndexType) \ void extract_diagonal(std::shared_ptr exec, \ - const matrix::Csr *orig, \ - matrix::Diagonal *diag) + const matrix::Csr* orig, \ + matrix::Diagonal* diag) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ diff --git a/core/matrix/dense.cpp b/core/matrix/dense.cpp index 37b82970f6d..b2d086071b2 100644 --- a/core/matrix/dense.cpp +++ b/core/matrix/dense.cpp @@ -112,8 +112,8 @@ namespace { template -inline void conversion_helper(Coo *result, - MatrixType *source, const OperationType &op) +inline void conversion_helper(Coo* result, + MatrixType* source, const OperationType& op) { auto exec = source->get_executor(); @@ -128,8 +128,8 @@ inline void conversion_helper(Coo *result, template -inline void conversion_helper(Csr *result, - MatrixType *source, const OperationType &op) +inline void conversion_helper(Csr* result, + MatrixType* source, const OperationType& op) { auto exec = source->get_executor(); @@ -144,8 +144,8 @@ inline void conversion_helper(Csr *result, template -inline void conversion_helper(Ell *result, - MatrixType *source, const OperationType &op) +inline void conversion_helper(Ell* result, + MatrixType* source, const OperationType& op) { auto exec = source->get_executor(); size_type num_stored_elements_per_row = 0; @@ -163,8 +163,8 @@ inline void conversion_helper(Ell *result, template -inline void conversion_helper(Hybrid *result, - MatrixType *source, const OperationType &op) +inline void conversion_helper(Hybrid* result, + MatrixType* source, const OperationType& op) { auto exec = source->get_executor(); Array row_nnz(exec, source->get_size()[0]); @@ -188,8 +188,8 @@ inline void conversion_helper(Hybrid *result, template -inline void conversion_helper(Sellp *result, - MatrixType *source, const OperationType &op) +inline void conversion_helper(Sellp* result, + MatrixType* source, const OperationType& op) { auto exec = source->get_executor(); const auto stride_factor = (result->get_stride_factor() == 0) @@ -210,8 +210,8 @@ inline void conversion_helper(Sellp *result, template -inline void conversion_helper(SparsityCsr *result, - MatrixType *source, const OperationType &op) +inline void conversion_helper(SparsityCsr* result, + MatrixType* source, const OperationType& op) { auto exec = source->get_executor(); @@ -228,7 +228,7 @@ inline void conversion_helper(SparsityCsr *result, template -void Dense::apply_impl(const LinOp *b, LinOp *x) const +void Dense::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -240,8 +240,8 @@ void Dense::apply_impl(const LinOp *b, LinOp *x) const template -void Dense::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void Dense::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { @@ -260,7 +260,7 @@ void Dense::fill(const ValueType value) template -void Dense::inv_scale_impl(const LinOp *alpha) +void Dense::inv_scale_impl(const LinOp* alpha) { GKO_ASSERT_EQUAL_ROWS(alpha, dim<2>(1, 1)); if (alpha->get_size()[1] != 1) { @@ -269,12 +269,12 @@ void Dense::inv_scale_impl(const LinOp *alpha) } auto exec = this->get_executor(); // if alpha is real (convertible to real) and ValueType complex - if (dynamic_cast> *>(alpha) && + if (dynamic_cast>*>(alpha) && is_complex()) { // use the real-complex kernel exec->run(dense::make_inv_scale( make_temporary_conversion>(alpha).get(), - dynamic_cast(this))); + dynamic_cast(this))); // this last cast is a no-op for complex value type and the branch is // never taken for real value type } else { @@ -286,7 +286,7 @@ void Dense::inv_scale_impl(const LinOp *alpha) template -void Dense::scale_impl(const LinOp *alpha) +void Dense::scale_impl(const LinOp* alpha) { GKO_ASSERT_EQUAL_ROWS(alpha, dim<2>(1, 1)); if (alpha->get_size()[1] != 1) { @@ -295,12 +295,12 @@ void Dense::scale_impl(const LinOp *alpha) } auto exec = this->get_executor(); // if alpha is real (convertible to real) and ValueType complex - if (dynamic_cast> *>(alpha) && + if (dynamic_cast>*>(alpha) && is_complex()) { // use the real-complex kernel exec->run(dense::make_scale( make_temporary_conversion>(alpha).get(), - dynamic_cast(this))); + dynamic_cast(this))); // this last cast is a no-op for complex value type and the branch is // never taken for real value type } else { @@ -312,7 +312,7 @@ void Dense::scale_impl(const LinOp *alpha) template -void Dense::add_scaled_impl(const LinOp *alpha, const LinOp *b) +void Dense::add_scaled_impl(const LinOp* alpha, const LinOp* b) { GKO_ASSERT_EQUAL_ROWS(alpha, dim<2>(1, 1)); if (alpha->get_size()[1] != 1) { @@ -323,17 +323,17 @@ void Dense::add_scaled_impl(const LinOp *alpha, const LinOp *b) auto exec = this->get_executor(); // if alpha is real and value type complex - if (dynamic_cast> *>(alpha) && + if (dynamic_cast>*>(alpha) && is_complex()) { exec->run(dense::make_add_scaled( make_temporary_conversion>(alpha).get(), make_temporary_conversion>(b).get(), - dynamic_cast(this))); + dynamic_cast(this))); } else { - if (dynamic_cast *>(b)) { + if (dynamic_cast*>(b)) { exec->run(dense::make_add_scaled_diag( make_temporary_conversion(alpha).get(), - dynamic_cast *>(b), this)); + dynamic_cast*>(b), this)); } else { exec->run(dense::make_add_scaled( make_temporary_conversion(alpha).get(), @@ -344,7 +344,7 @@ void Dense::add_scaled_impl(const LinOp *alpha, const LinOp *b) template -void Dense::sub_scaled_impl(const LinOp *alpha, const LinOp *b) +void Dense::sub_scaled_impl(const LinOp* alpha, const LinOp* b) { GKO_ASSERT_EQUAL_ROWS(alpha, dim<2>(1, 1)); if (alpha->get_size()[1] != 1) { @@ -354,17 +354,17 @@ void Dense::sub_scaled_impl(const LinOp *alpha, const LinOp *b) GKO_ASSERT_EQUAL_DIMENSIONS(this, b); auto exec = this->get_executor(); - if (dynamic_cast> *>(alpha) && + if (dynamic_cast>*>(alpha) && is_complex()) { exec->run(dense::make_sub_scaled( make_temporary_conversion>(alpha).get(), make_temporary_conversion>(b).get(), - dynamic_cast(this))); + dynamic_cast(this))); } else { - if (dynamic_cast *>(b)) { + if (dynamic_cast*>(b)) { exec->run(dense::make_sub_scaled_diag( make_temporary_conversion(alpha).get(), - dynamic_cast *>(b), this)); + dynamic_cast*>(b), this)); } else { exec->run(dense::make_sub_scaled( make_temporary_conversion(alpha).get(), @@ -375,7 +375,7 @@ void Dense::sub_scaled_impl(const LinOp *alpha, const LinOp *b) template -void Dense::compute_dot_impl(const LinOp *b, LinOp *result) const +void Dense::compute_dot_impl(const LinOp* b, LinOp* result) const { GKO_ASSERT_EQUAL_DIMENSIONS(this, b); GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1])); @@ -387,8 +387,8 @@ void Dense::compute_dot_impl(const LinOp *b, LinOp *result) const template -void Dense::compute_conj_dot_impl(const LinOp *b, - LinOp *result) const +void Dense::compute_conj_dot_impl(const LinOp* b, + LinOp* result) const { GKO_ASSERT_EQUAL_DIMENSIONS(this, b); GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1])); @@ -401,7 +401,7 @@ void Dense::compute_conj_dot_impl(const LinOp *b, template -void Dense::compute_norm2_impl(LinOp *result) const +void Dense::compute_norm2_impl(LinOp* result) const { GKO_ASSERT_EQUAL_DIMENSIONS(result, dim<2>(1, this->get_size()[1])); auto exec = this->get_executor(); @@ -412,7 +412,7 @@ void Dense::compute_norm2_impl(LinOp *result) const template -void Dense::convert_to(Dense *result) const +void Dense::convert_to(Dense* result) const { if (this->get_size() && result->get_size() == this->get_size()) { // we need to create a executor-local clone of the target data, that @@ -436,7 +436,7 @@ void Dense::convert_to(Dense *result) const template -void Dense::move_to(Dense *result) +void Dense::move_to(Dense* result) { this->convert_to(result); } @@ -444,7 +444,7 @@ void Dense::move_to(Dense *result) template void Dense::convert_to( - Dense> *result) const + Dense>* result) const { if (result->get_size() == this->get_size()) { auto exec = this->get_executor(); @@ -459,49 +459,49 @@ void Dense::convert_to( template -void Dense::move_to(Dense> *result) +void Dense::move_to(Dense>* result) { this->convert_to(result); } template -void Dense::convert_to(Coo *result) const +void Dense::convert_to(Coo* result) const { // const ref parameters, as make_* functions take parameters by ref - conversion_helper(result, this, [](const auto &in, const auto &out) { + conversion_helper(result, this, [](const auto& in, const auto& out) { return dense::make_convert_to_coo(in, out); }); } template -void Dense::move_to(Coo *result) +void Dense::move_to(Coo* result) { this->convert_to(result); } template -void Dense::convert_to(Coo *result) const +void Dense::convert_to(Coo* result) const { - conversion_helper(result, this, [](const auto &in, const auto &out) { + conversion_helper(result, this, [](const auto& in, const auto& out) { return dense::make_convert_to_coo(in, out); }); } template -void Dense::move_to(Coo *result) +void Dense::move_to(Coo* result) { this->convert_to(result); } template -void Dense::convert_to(Csr *result) const +void Dense::convert_to(Csr* result) const { - conversion_helper(result, this, [](const auto &in, const auto &out) { + conversion_helper(result, this, [](const auto& in, const auto& out) { return dense::make_convert_to_csr(in, out); }); result->make_srow(); @@ -509,16 +509,16 @@ void Dense::convert_to(Csr *result) const template -void Dense::move_to(Csr *result) +void Dense::move_to(Csr* result) { this->convert_to(result); } template -void Dense::convert_to(Csr *result) const +void Dense::convert_to(Csr* result) const { - conversion_helper(result, this, [](const auto &in, const auto &out) { + conversion_helper(result, this, [](const auto& in, const auto& out) { return dense::make_convert_to_csr(in, out); }); result->make_srow(); @@ -526,135 +526,135 @@ void Dense::convert_to(Csr *result) const template -void Dense::move_to(Csr *result) +void Dense::move_to(Csr* result) { this->convert_to(result); } template -void Dense::convert_to(Ell *result) const +void Dense::convert_to(Ell* result) const { - conversion_helper(result, this, [](const auto &in, const auto &out) { + conversion_helper(result, this, [](const auto& in, const auto& out) { return dense::make_convert_to_ell(in, out); }); } template -void Dense::move_to(Ell *result) +void Dense::move_to(Ell* result) { this->convert_to(result); } template -void Dense::convert_to(Ell *result) const +void Dense::convert_to(Ell* result) const { - conversion_helper(result, this, [](const auto &in, const auto &out) { + conversion_helper(result, this, [](const auto& in, const auto& out) { return dense::make_convert_to_ell(in, out); }); } template -void Dense::move_to(Ell *result) +void Dense::move_to(Ell* result) { this->convert_to(result); } template -void Dense::convert_to(Hybrid *result) const +void Dense::convert_to(Hybrid* result) const { - conversion_helper(result, this, [](const auto &in, const auto &out) { + conversion_helper(result, this, [](const auto& in, const auto& out) { return dense::make_convert_to_hybrid(in, out); }); } template -void Dense::move_to(Hybrid *result) +void Dense::move_to(Hybrid* result) { this->convert_to(result); } template -void Dense::convert_to(Hybrid *result) const +void Dense::convert_to(Hybrid* result) const { - conversion_helper(result, this, [](const auto &in, const auto &out) { + conversion_helper(result, this, [](const auto& in, const auto& out) { return dense::make_convert_to_hybrid(in, out); }); } template -void Dense::move_to(Hybrid *result) +void Dense::move_to(Hybrid* result) { this->convert_to(result); } template -void Dense::convert_to(Sellp *result) const +void Dense::convert_to(Sellp* result) const { - conversion_helper(result, this, [](const auto &in, const auto &out) { + conversion_helper(result, this, [](const auto& in, const auto& out) { return dense::make_convert_to_sellp(in, out); }); } template -void Dense::move_to(Sellp *result) +void Dense::move_to(Sellp* result) { this->convert_to(result); } template -void Dense::convert_to(Sellp *result) const +void Dense::convert_to(Sellp* result) const { - conversion_helper(result, this, [](const auto &in, const auto &out) { + conversion_helper(result, this, [](const auto& in, const auto& out) { return dense::make_convert_to_sellp(in, out); }); } template -void Dense::move_to(Sellp *result) +void Dense::move_to(Sellp* result) { this->convert_to(result); } template -void Dense::convert_to(SparsityCsr *result) const +void Dense::convert_to(SparsityCsr* result) const { - conversion_helper(result, this, [](const auto &in, const auto &out) { + conversion_helper(result, this, [](const auto& in, const auto& out) { return dense::make_convert_to_sparsity_csr(in, out); }); } template -void Dense::move_to(SparsityCsr *result) +void Dense::move_to(SparsityCsr* result) { this->convert_to(result); } template -void Dense::convert_to(SparsityCsr *result) const +void Dense::convert_to(SparsityCsr* result) const { - conversion_helper(result, this, [](const auto &in, const auto &out) { + conversion_helper(result, this, [](const auto& in, const auto& out) { return dense::make_convert_to_sparsity_csr(in, out); }); } template -void Dense::move_to(SparsityCsr *result) +void Dense::move_to(SparsityCsr* result) { this->convert_to(result); } @@ -664,7 +664,7 @@ namespace { template -inline void read_impl(MatrixType *mtx, const MatrixData &data) +inline void read_impl(MatrixType* mtx, const MatrixData& data) { auto tmp = MatrixType::create(mtx->get_executor()->get_master(), data.size); size_type ind = 0; @@ -687,14 +687,14 @@ inline void read_impl(MatrixType *mtx, const MatrixData &data) template -void Dense::read(const mat_data &data) +void Dense::read(const mat_data& data) { read_impl(this, data); } template -void Dense::read(const mat_data32 &data) +void Dense::read(const mat_data32& data) { read_impl(this, data); } @@ -704,13 +704,13 @@ namespace { template -inline void write_impl(const MatrixType *mtx, MatrixData &data) +inline void write_impl(const MatrixType* mtx, MatrixData& data) { std::unique_ptr op{}; - const MatrixType *tmp{}; + const MatrixType* tmp{}; if (mtx->get_executor()->get_master() != mtx->get_executor()) { op = mtx->clone(mtx->get_executor()->get_master()); - tmp = static_cast(op.get()); + tmp = static_cast(op.get()); } else { tmp = mtx; } @@ -731,14 +731,14 @@ inline void write_impl(const MatrixType *mtx, MatrixData &data) template -void Dense::write(mat_data &data) const +void Dense::write(mat_data& data) const { write_impl(this, data); } template -void Dense::write(mat_data32 &data) const +void Dense::write(mat_data32& data) const { write_impl(this, data); } @@ -765,7 +765,7 @@ std::unique_ptr Dense::conj_transpose() const template -void Dense::transpose(Dense *output) const +void Dense::transpose(Dense* output) const { GKO_ASSERT_EQUAL_DIMENSIONS(output, gko::transpose(this->get_size())); auto exec = this->get_executor(); @@ -775,7 +775,7 @@ void Dense::transpose(Dense *output) const template -void Dense::conj_transpose(Dense *output) const +void Dense::conj_transpose(Dense* output) const { GKO_ASSERT_EQUAL_DIMENSIONS(output, gko::transpose(this->get_size())); auto exec = this->get_executor(); @@ -786,8 +786,8 @@ void Dense::conj_transpose(Dense *output) const template template -void Dense::permute_impl(const Array *permutation_indices, - Dense *output) const +void Dense::permute_impl(const Array* permutation_indices, + Dense* output) const { GKO_ASSERT_IS_SQUARE_MATRIX(this); GKO_ASSERT_EQUAL_DIMENSIONS(this, output); @@ -803,7 +803,7 @@ void Dense::permute_impl(const Array *permutation_indices, template template void Dense::inverse_permute_impl( - const Array *permutation_indices, Dense *output) const + const Array* permutation_indices, Dense* output) const { GKO_ASSERT_IS_SQUARE_MATRIX(this); GKO_ASSERT_EQUAL_DIMENSIONS(this, output); @@ -819,7 +819,7 @@ void Dense::inverse_permute_impl( template template void Dense::row_permute_impl( - const Array *permutation_indices, Dense *output) const + const Array* permutation_indices, Dense* output) const { GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); GKO_ASSERT_EQUAL_DIMENSIONS(this, output); @@ -833,8 +833,8 @@ void Dense::row_permute_impl( template template -void Dense::row_gather_impl(const Array *row_indices, - Dense *row_gathered) const +void Dense::row_gather_impl(const Array* row_indices, + Dense* row_gathered) const { auto exec = this->get_executor(); dim<2> expected_dim{row_indices->get_num_elems(), this->get_size()[1]}; @@ -849,7 +849,7 @@ void Dense::row_gather_impl(const Array *row_indices, template template void Dense::column_permute_impl( - const Array *permutation_indices, Dense *output) const + const Array* permutation_indices, Dense* output) const { GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]); GKO_ASSERT_EQUAL_DIMENSIONS(this, output); @@ -864,7 +864,7 @@ void Dense::column_permute_impl( template template void Dense::inverse_row_permute_impl( - const Array *permutation_indices, Dense *output) const + const Array* permutation_indices, Dense* output) const { GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[0]); GKO_ASSERT_EQUAL_DIMENSIONS(this, output); @@ -879,7 +879,7 @@ void Dense::inverse_row_permute_impl( template template void Dense::inverse_column_permute_impl( - const Array *permutation_indices, Dense *output) const + const Array* permutation_indices, Dense* output) const { GKO_ASSERT_EQ(permutation_indices->get_num_elems(), this->get_size()[1]); GKO_ASSERT_EQUAL_DIMENSIONS(this, output); @@ -893,7 +893,7 @@ void Dense::inverse_column_permute_impl( template std::unique_ptr Dense::permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { auto result = Dense::create(this->get_executor(), this->get_size()); this->permute(permutation_indices, result.get()); @@ -903,7 +903,7 @@ std::unique_ptr Dense::permute( template std::unique_ptr Dense::permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { auto result = Dense::create(this->get_executor(), this->get_size()); this->permute(permutation_indices, result.get()); @@ -912,16 +912,16 @@ std::unique_ptr Dense::permute( template -void Dense::permute(const Array *permutation_indices, - Dense *output) const +void Dense::permute(const Array* permutation_indices, + Dense* output) const { this->permute_impl(permutation_indices, output); } template -void Dense::permute(const Array *permutation_indices, - Dense *output) const +void Dense::permute(const Array* permutation_indices, + Dense* output) const { this->permute_impl(permutation_indices, output); } @@ -929,7 +929,7 @@ void Dense::permute(const Array *permutation_indices, template std::unique_ptr Dense::inverse_permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { auto result = Dense::create(this->get_executor(), this->get_size()); this->inverse_permute(permutation_indices, result.get()); @@ -939,7 +939,7 @@ std::unique_ptr Dense::inverse_permute( template std::unique_ptr Dense::inverse_permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { auto result = Dense::create(this->get_executor(), this->get_size()); this->inverse_permute(permutation_indices, result.get()); @@ -948,16 +948,16 @@ std::unique_ptr Dense::inverse_permute( template -void Dense::inverse_permute(const Array *permutation_indices, - Dense *output) const +void Dense::inverse_permute(const Array* permutation_indices, + Dense* output) const { this->inverse_permute_impl(permutation_indices, output); } template -void Dense::inverse_permute(const Array *permutation_indices, - Dense *output) const +void Dense::inverse_permute(const Array* permutation_indices, + Dense* output) const { this->inverse_permute_impl(permutation_indices, output); } @@ -965,7 +965,7 @@ void Dense::inverse_permute(const Array *permutation_indices, template std::unique_ptr Dense::row_permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { auto result = Dense::create(this->get_executor(), this->get_size()); this->row_permute(permutation_indices, result.get()); @@ -975,7 +975,7 @@ std::unique_ptr Dense::row_permute( template std::unique_ptr Dense::row_permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { auto result = Dense::create(this->get_executor(), this->get_size()); this->row_permute(permutation_indices, result.get()); @@ -984,16 +984,16 @@ std::unique_ptr Dense::row_permute( template -void Dense::row_permute(const Array *permutation_indices, - Dense *output) const +void Dense::row_permute(const Array* permutation_indices, + Dense* output) const { this->row_permute_impl(permutation_indices, output); } template -void Dense::row_permute(const Array *permutation_indices, - Dense *output) const +void Dense::row_permute(const Array* permutation_indices, + Dense* output) const { this->row_permute_impl(permutation_indices, output); } @@ -1001,7 +1001,7 @@ void Dense::row_permute(const Array *permutation_indices, template std::unique_ptr> Dense::row_gather( - const Array *row_indices) const + const Array* row_indices) const { auto exec = this->get_executor(); dim<2> out_dim{row_indices->get_num_elems(), this->get_size()[1]}; @@ -1013,7 +1013,7 @@ std::unique_ptr> Dense::row_gather( template std::unique_ptr> Dense::row_gather( - const Array *row_indices) const + const Array* row_indices) const { auto exec = this->get_executor(); dim<2> out_dim{row_indices->get_num_elems(), this->get_size()[1]}; @@ -1024,16 +1024,16 @@ std::unique_ptr> Dense::row_gather( template -void Dense::row_gather(const Array *row_indices, - Dense *row_gathered) const +void Dense::row_gather(const Array* row_indices, + Dense* row_gathered) const { this->row_gather_impl(row_indices, row_gathered); } template -void Dense::row_gather(const Array *row_indices, - Dense *row_gathered) const +void Dense::row_gather(const Array* row_indices, + Dense* row_gathered) const { this->row_gather_impl(row_indices, row_gathered); } @@ -1041,7 +1041,7 @@ void Dense::row_gather(const Array *row_indices, template std::unique_ptr Dense::column_permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { auto result = Dense::create(this->get_executor(), this->get_size()); this->column_permute(permutation_indices, result.get()); @@ -1051,7 +1051,7 @@ std::unique_ptr Dense::column_permute( template std::unique_ptr Dense::column_permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { auto result = Dense::create(this->get_executor(), this->get_size()); this->column_permute(permutation_indices, result.get()); @@ -1060,16 +1060,16 @@ std::unique_ptr Dense::column_permute( template -void Dense::column_permute(const Array *permutation_indices, - Dense *output) const +void Dense::column_permute(const Array* permutation_indices, + Dense* output) const { this->column_permute_impl(permutation_indices, output); } template -void Dense::column_permute(const Array *permutation_indices, - Dense *output) const +void Dense::column_permute(const Array* permutation_indices, + Dense* output) const { this->column_permute_impl(permutation_indices, output); } @@ -1077,7 +1077,7 @@ void Dense::column_permute(const Array *permutation_indices, template std::unique_ptr Dense::inverse_row_permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { auto result = Dense::create(this->get_executor(), this->get_size()); this->inverse_row_permute(permutation_indices, result.get()); @@ -1087,7 +1087,7 @@ std::unique_ptr Dense::inverse_row_permute( template std::unique_ptr Dense::inverse_row_permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { auto result = Dense::create(this->get_executor(), this->get_size()); this->inverse_row_permute(permutation_indices, result.get()); @@ -1097,7 +1097,7 @@ std::unique_ptr Dense::inverse_row_permute( template void Dense::inverse_row_permute( - const Array *permutation_indices, Dense *output) const + const Array* permutation_indices, Dense* output) const { this->inverse_row_permute_impl(permutation_indices, output); } @@ -1105,7 +1105,7 @@ void Dense::inverse_row_permute( template void Dense::inverse_row_permute( - const Array *permutation_indices, Dense *output) const + const Array* permutation_indices, Dense* output) const { this->inverse_row_permute_impl(permutation_indices, output); } @@ -1113,7 +1113,7 @@ void Dense::inverse_row_permute( template std::unique_ptr Dense::inverse_column_permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { auto result = Dense::create(this->get_executor(), this->get_size()); this->inverse_column_permute(permutation_indices, result.get()); @@ -1123,7 +1123,7 @@ std::unique_ptr Dense::inverse_column_permute( template std::unique_ptr Dense::inverse_column_permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { auto result = Dense::create(this->get_executor(), this->get_size()); this->inverse_column_permute(permutation_indices, result.get()); @@ -1133,7 +1133,7 @@ std::unique_ptr Dense::inverse_column_permute( template void Dense::inverse_column_permute( - const Array *permutation_indices, Dense *output) const + const Array* permutation_indices, Dense* output) const { this->inverse_column_permute_impl(permutation_indices, output); } @@ -1141,14 +1141,14 @@ void Dense::inverse_column_permute( template void Dense::inverse_column_permute( - const Array *permutation_indices, Dense *output) const + const Array* permutation_indices, Dense* output) const { this->inverse_column_permute_impl(permutation_indices, output); } template -void Dense::extract_diagonal(Diagonal *output) const +void Dense::extract_diagonal(Diagonal* output) const { auto exec = this->get_executor(); const auto diag_size = std::min(this->get_size()[0], this->get_size()[1]); @@ -1189,7 +1189,7 @@ Dense::compute_absolute() const template void Dense::compute_absolute( - Dense::absolute_type *output) const + Dense::absolute_type* output) const { GKO_ASSERT_EQUAL_DIMENSIONS(this, output); auto exec = this->get_executor(); @@ -1211,7 +1211,7 @@ Dense::make_complex() const template void Dense::make_complex( - typename Dense::complex_type *result) const + typename Dense::complex_type* result) const { GKO_ASSERT_EQUAL_DIMENSIONS(this, result); auto exec = this->get_executor(); @@ -1233,7 +1233,7 @@ Dense::get_real() const template void Dense::get_real( - typename Dense::real_type *result) const + typename Dense::real_type* result) const { GKO_ASSERT_EQUAL_DIMENSIONS(this, result); auto exec = this->get_executor(); @@ -1255,7 +1255,7 @@ Dense::get_imag() const template void Dense::get_imag( - typename Dense::real_type *result) const + typename Dense::real_type* result) const { GKO_ASSERT_EQUAL_DIMENSIONS(this, result); auto exec = this->get_executor(); diff --git a/core/matrix/dense_kernels.hpp b/core/matrix/dense_kernels.hpp index 64e7c27162c..bbf136f41dc 100644 --- a/core/matrix/dense_kernels.hpp +++ b/core/matrix/dense_kernels.hpp @@ -51,198 +51,198 @@ namespace kernels { #define GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL(_type) \ void simple_apply(std::shared_ptr exec, \ - const matrix::Dense<_type> *a, \ - const matrix::Dense<_type> *b, matrix::Dense<_type> *c) + const matrix::Dense<_type>* a, \ + const matrix::Dense<_type>* b, matrix::Dense<_type>* c) #define GKO_DECLARE_DENSE_APPLY_KERNEL(_type) \ void apply(std::shared_ptr exec, \ - const matrix::Dense<_type> *alpha, \ - const matrix::Dense<_type> *a, const matrix::Dense<_type> *b, \ - const matrix::Dense<_type> *beta, matrix::Dense<_type> *c) + const matrix::Dense<_type>* alpha, \ + const matrix::Dense<_type>* a, const matrix::Dense<_type>* b, \ + const matrix::Dense<_type>* beta, matrix::Dense<_type>* c) #define GKO_DECLARE_DENSE_COPY_KERNEL(_intype, _outtype) \ void copy(std::shared_ptr exec, \ - const matrix::Dense<_intype> *input, \ - matrix::Dense<_outtype> *output) + const matrix::Dense<_intype>* input, \ + matrix::Dense<_outtype>* output) #define GKO_DECLARE_DENSE_FILL_KERNEL(_type) \ void fill(std::shared_ptr exec, \ - matrix::Dense<_type> *mat, _type value) + matrix::Dense<_type>* mat, _type value) #define GKO_DECLARE_DENSE_SCALE_KERNEL(_type, _scalar_type) \ void scale(std::shared_ptr exec, \ - const matrix::Dense<_scalar_type> *alpha, \ - matrix::Dense<_type> *x) + const matrix::Dense<_scalar_type>* alpha, \ + matrix::Dense<_type>* x) #define GKO_DECLARE_DENSE_INV_SCALE_KERNEL(_type, _scalar_type) \ void inv_scale(std::shared_ptr exec, \ - const matrix::Dense<_scalar_type> *alpha, \ - matrix::Dense<_type> *x) + const matrix::Dense<_scalar_type>* alpha, \ + matrix::Dense<_type>* x) #define GKO_DECLARE_DENSE_ADD_SCALED_KERNEL(_type, _scalar_type) \ void add_scaled(std::shared_ptr exec, \ - const matrix::Dense<_scalar_type> *alpha, \ - const matrix::Dense<_type> *x, matrix::Dense<_type> *y) + const matrix::Dense<_scalar_type>* alpha, \ + const matrix::Dense<_type>* x, matrix::Dense<_type>* y) #define GKO_DECLARE_DENSE_SUB_SCALED_KERNEL(_type, _scalar_type) \ void sub_scaled(std::shared_ptr exec, \ - const matrix::Dense<_scalar_type> *alpha, \ - const matrix::Dense<_type> *x, matrix::Dense<_type> *y) + const matrix::Dense<_scalar_type>* alpha, \ + const matrix::Dense<_type>* x, matrix::Dense<_type>* y) #define GKO_DECLARE_DENSE_ADD_SCALED_DIAG_KERNEL(_type) \ void add_scaled_diag(std::shared_ptr exec, \ - const matrix::Dense<_type> *alpha, \ - const matrix::Diagonal<_type> *x, \ - matrix::Dense<_type> *y) + const matrix::Dense<_type>* alpha, \ + const matrix::Diagonal<_type>* x, \ + matrix::Dense<_type>* y) #define GKO_DECLARE_DENSE_SUB_SCALED_DIAG_KERNEL(_type) \ void sub_scaled_diag(std::shared_ptr exec, \ - const matrix::Dense<_type> *alpha, \ - const matrix::Diagonal<_type> *x, \ - matrix::Dense<_type> *y) + const matrix::Dense<_type>* alpha, \ + const matrix::Diagonal<_type>* x, \ + matrix::Dense<_type>* y) #define GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL(_type) \ void compute_dot(std::shared_ptr exec, \ - const matrix::Dense<_type> *x, \ - const matrix::Dense<_type> *y, \ - matrix::Dense<_type> *result) + const matrix::Dense<_type>* x, \ + const matrix::Dense<_type>* y, \ + matrix::Dense<_type>* result) #define GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL(_type) \ void compute_conj_dot(std::shared_ptr exec, \ - const matrix::Dense<_type> *x, \ - const matrix::Dense<_type> *y, \ - matrix::Dense<_type> *result) + const matrix::Dense<_type>* x, \ + const matrix::Dense<_type>* y, \ + matrix::Dense<_type>* result) #define GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL(_type) \ void compute_norm2(std::shared_ptr exec, \ - const matrix::Dense<_type> *x, \ - matrix::Dense> *result) + const matrix::Dense<_type>* x, \ + matrix::Dense>* result) #define GKO_DECLARE_DENSE_CONVERT_TO_COO_KERNEL(_type, _prec) \ void convert_to_coo(std::shared_ptr exec, \ - const matrix::Dense<_type> *source, \ - matrix::Coo<_type, _prec> *other) + const matrix::Dense<_type>* source, \ + matrix::Coo<_type, _prec>* other) #define GKO_DECLARE_DENSE_CONVERT_TO_CSR_KERNEL(_type, _prec) \ void convert_to_csr(std::shared_ptr exec, \ - const matrix::Dense<_type> *source, \ - matrix::Csr<_type, _prec> *other) + const matrix::Dense<_type>* source, \ + matrix::Csr<_type, _prec>* other) #define GKO_DECLARE_DENSE_CONVERT_TO_ELL_KERNEL(_type, _prec) \ void convert_to_ell(std::shared_ptr exec, \ - const matrix::Dense<_type> *source, \ - matrix::Ell<_type, _prec> *other) + const matrix::Dense<_type>* source, \ + matrix::Ell<_type, _prec>* other) #define GKO_DECLARE_DENSE_CONVERT_TO_HYBRID_KERNEL(_type, _prec) \ void convert_to_hybrid(std::shared_ptr exec, \ - const matrix::Dense<_type> *source, \ - matrix::Hybrid<_type, _prec> *other) + const matrix::Dense<_type>* source, \ + matrix::Hybrid<_type, _prec>* other) #define GKO_DECLARE_DENSE_CONVERT_TO_SELLP_KERNEL(_type, _prec) \ void convert_to_sellp(std::shared_ptr exec, \ - const matrix::Dense<_type> *source, \ - matrix::Sellp<_type, _prec> *other) + const matrix::Dense<_type>* source, \ + matrix::Sellp<_type, _prec>* other) #define GKO_DECLARE_DENSE_CONVERT_TO_SPARSITY_CSR_KERNEL(_type, _prec) \ void convert_to_sparsity_csr(std::shared_ptr exec, \ - const matrix::Dense<_type> *source, \ - matrix::SparsityCsr<_type, _prec> *other) + const matrix::Dense<_type>* source, \ + matrix::SparsityCsr<_type, _prec>* other) #define GKO_DECLARE_DENSE_COUNT_NONZEROS_KERNEL(_type) \ void count_nonzeros(std::shared_ptr exec, \ - const matrix::Dense<_type> *source, size_type *result) + const matrix::Dense<_type>* source, size_type* result) #define GKO_DECLARE_DENSE_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(_type) \ void calculate_max_nnz_per_row( \ std::shared_ptr exec, \ - const matrix::Dense<_type> *source, size_type *result) + const matrix::Dense<_type>* source, size_type* result) #define GKO_DECLARE_DENSE_CALCULATE_NONZEROS_PER_ROW_KERNEL(_type) \ void calculate_nonzeros_per_row( \ std::shared_ptr exec, \ - const matrix::Dense<_type> *source, Array *result) + const matrix::Dense<_type>* source, Array* result) #define GKO_DECLARE_DENSE_CALCULATE_TOTAL_COLS_KERNEL(_type) \ void calculate_total_cols(std::shared_ptr exec, \ - const matrix::Dense<_type> *source, \ - size_type *result, size_type stride_factor, \ + const matrix::Dense<_type>* source, \ + size_type* result, size_type stride_factor, \ size_type slice_size) #define GKO_DECLARE_DENSE_TRANSPOSE_KERNEL(_type) \ void transpose(std::shared_ptr exec, \ - const matrix::Dense<_type> *orig, \ - matrix::Dense<_type> *trans) + const matrix::Dense<_type>* orig, \ + matrix::Dense<_type>* trans) #define GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL(_type) \ void conj_transpose(std::shared_ptr exec, \ - const matrix::Dense<_type> *orig, \ - matrix::Dense<_type> *trans) + const matrix::Dense<_type>* orig, \ + matrix::Dense<_type>* trans) #define GKO_DECLARE_DENSE_SYMM_PERMUTE_KERNEL(_vtype, _itype) \ void symm_permute(std::shared_ptr exec, \ - const Array<_itype> *permutation_indices, \ - const matrix::Dense<_vtype> *orig, \ - matrix::Dense<_vtype> *permuted) + const Array<_itype>* permutation_indices, \ + const matrix::Dense<_vtype>* orig, \ + matrix::Dense<_vtype>* permuted) #define GKO_DECLARE_DENSE_INV_SYMM_PERMUTE_KERNEL(_vtype, _itype) \ void inv_symm_permute(std::shared_ptr exec, \ - const Array<_itype> *permutation_indices, \ - const matrix::Dense<_vtype> *orig, \ - matrix::Dense<_vtype> *permuted) + const Array<_itype>* permutation_indices, \ + const matrix::Dense<_vtype>* orig, \ + matrix::Dense<_vtype>* permuted) #define GKO_DECLARE_DENSE_ROW_GATHER_KERNEL(_vtype, _itype) \ void row_gather(std::shared_ptr exec, \ - const Array<_itype> *gather_indices, \ - const matrix::Dense<_vtype> *orig, \ - matrix::Dense<_vtype> *row_gathered) + const Array<_itype>* gather_indices, \ + const matrix::Dense<_vtype>* orig, \ + matrix::Dense<_vtype>* row_gathered) #define GKO_DECLARE_DENSE_COLUMN_PERMUTE_KERNEL(_vtype, _itype) \ void column_permute(std::shared_ptr exec, \ - const Array<_itype> *permutation_indices, \ - const matrix::Dense<_vtype> *orig, \ - matrix::Dense<_vtype> *column_permuted) + const Array<_itype>* permutation_indices, \ + const matrix::Dense<_vtype>* orig, \ + matrix::Dense<_vtype>* column_permuted) #define GKO_DECLARE_DENSE_INV_ROW_PERMUTE_KERNEL(_vtype, _itype) \ void inverse_row_permute(std::shared_ptr exec, \ - const Array<_itype> *permutation_indices, \ - const matrix::Dense<_vtype> *orig, \ - matrix::Dense<_vtype> *row_permuted) + const Array<_itype>* permutation_indices, \ + const matrix::Dense<_vtype>* orig, \ + matrix::Dense<_vtype>* row_permuted) #define GKO_DECLARE_DENSE_INV_COLUMN_PERMUTE_KERNEL(_vtype, _itype) \ void inverse_column_permute(std::shared_ptr exec, \ - const Array<_itype> *permutation_indices, \ - const matrix::Dense<_vtype> *orig, \ - matrix::Dense<_vtype> *column_permuted) + const Array<_itype>* permutation_indices, \ + const matrix::Dense<_vtype>* orig, \ + matrix::Dense<_vtype>* column_permuted) #define GKO_DECLARE_DENSE_EXTRACT_DIAGONAL_KERNEL(_vtype) \ void extract_diagonal(std::shared_ptr exec, \ - const matrix::Dense<_vtype> *orig, \ - matrix::Diagonal<_vtype> *diag) + const matrix::Dense<_vtype>* orig, \ + matrix::Diagonal<_vtype>* diag) #define GKO_DECLARE_INPLACE_ABSOLUTE_DENSE_KERNEL(_vtype) \ void inplace_absolute_dense(std::shared_ptr exec, \ - matrix::Dense<_vtype> *source) + matrix::Dense<_vtype>* source) #define GKO_DECLARE_OUTPLACE_ABSOLUTE_DENSE_KERNEL(_vtype) \ void outplace_absolute_dense( \ std::shared_ptr exec, \ - const matrix::Dense<_vtype> *source, \ - matrix::Dense> *result) + const matrix::Dense<_vtype>* source, \ + matrix::Dense>* result) #define GKO_DECLARE_MAKE_COMPLEX_KERNEL(_vtype) \ void make_complex(std::shared_ptr exec, \ - const matrix::Dense<_vtype> *source, \ - matrix::Dense> *result) + const matrix::Dense<_vtype>* source, \ + matrix::Dense>* result) #define GKO_DECLARE_GET_REAL_KERNEL(_vtype) \ void get_real(std::shared_ptr exec, \ - const matrix::Dense<_vtype> *source, \ - matrix::Dense> *result) + const matrix::Dense<_vtype>* source, \ + matrix::Dense>* result) #define GKO_DECLARE_GET_IMAG_KERNEL(_vtype) \ void get_imag(std::shared_ptr exec, \ - const matrix::Dense<_vtype> *source, \ - matrix::Dense> *result) + const matrix::Dense<_vtype>* source, \ + matrix::Dense>* result) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/matrix/diagonal.cpp b/core/matrix/diagonal.cpp index 9f158bc1650..04e501c69ac 100644 --- a/core/matrix/diagonal.cpp +++ b/core/matrix/diagonal.cpp @@ -66,16 +66,16 @@ GKO_REGISTER_OPERATION(outplace_absolute_array, template -void Diagonal::apply_impl(const LinOp *b, LinOp *x) const +void Diagonal::apply_impl(const LinOp* b, LinOp* x) const { auto exec = this->get_executor(); - if (dynamic_cast *>(b) && - dynamic_cast *>(x)) { + if (dynamic_cast*>(b) && + dynamic_cast*>(x)) { exec->run(diagonal::make_apply_to_csr( this, as>(b), as>(x))); - } else if (dynamic_cast *>(b) && - dynamic_cast *>(x)) { + } else if (dynamic_cast*>(b) && + dynamic_cast*>(x)) { exec->run(diagonal::make_apply_to_csr( this, as>(b), as>(x))); } else { @@ -90,16 +90,16 @@ void Diagonal::apply_impl(const LinOp *b, LinOp *x) const template -void Diagonal::rapply_impl(const LinOp *b, LinOp *x) const +void Diagonal::rapply_impl(const LinOp* b, LinOp* x) const { auto exec = this->get_executor(); - if (dynamic_cast *>(b) && - dynamic_cast *>(x)) { + if (dynamic_cast*>(b) && + dynamic_cast*>(x)) { exec->run(diagonal::make_right_apply_to_csr( this, as>(b), as>(x))); - } else if (dynamic_cast *>(b) && - dynamic_cast *>(x)) { + } else if (dynamic_cast*>(b) && + dynamic_cast*>(x)) { exec->run(diagonal::make_right_apply_to_csr( this, as>(b), as>(x))); } else { @@ -116,8 +116,8 @@ void Diagonal::rapply_impl(const LinOp *b, LinOp *x) const template -void Diagonal::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void Diagonal::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { @@ -150,7 +150,7 @@ std::unique_ptr Diagonal::conj_transpose() const template void Diagonal::convert_to( - Diagonal> *result) const + Diagonal>* result) const { result->values_ = this->values_; result->set_size(this->get_size()); @@ -158,14 +158,14 @@ void Diagonal::convert_to( template -void Diagonal::move_to(Diagonal> *result) +void Diagonal::move_to(Diagonal>* result) { this->convert_to(result); } template -void Diagonal::convert_to(Csr *result) const +void Diagonal::convert_to(Csr* result) const { auto exec = this->get_executor(); auto tmp = Csr::create( @@ -176,14 +176,14 @@ void Diagonal::convert_to(Csr *result) const template -void Diagonal::move_to(Csr *result) +void Diagonal::move_to(Csr* result) { this->convert_to(result); } template -void Diagonal::convert_to(Csr *result) const +void Diagonal::convert_to(Csr* result) const { auto exec = this->get_executor(); auto tmp = Csr::create( @@ -194,7 +194,7 @@ void Diagonal::convert_to(Csr *result) const template -void Diagonal::move_to(Csr *result) +void Diagonal::move_to(Csr* result) { this->convert_to(result); } @@ -204,7 +204,7 @@ namespace { template -inline void read_impl(MatrixType *mtx, const MatrixData &data) +inline void read_impl(MatrixType* mtx, const MatrixData& data) { // Diagonal matrices are assumed to be square. GKO_ASSERT_EQ(data.size[0], data.size[1]); @@ -235,14 +235,14 @@ inline void read_impl(MatrixType *mtx, const MatrixData &data) template -void Diagonal::read(const mat_data &data) +void Diagonal::read(const mat_data& data) { read_impl(this, data); } template -void Diagonal::read(const mat_data32 &data) +void Diagonal::read(const mat_data32& data) { read_impl(this, data); } @@ -252,13 +252,13 @@ namespace { template -inline void write_impl(const MatrixType *mtx, MatrixData &data) +inline void write_impl(const MatrixType* mtx, MatrixData& data) { std::unique_ptr op{}; - const MatrixType *tmp{}; + const MatrixType* tmp{}; if (mtx->get_executor()->get_master() != mtx->get_executor()) { op = mtx->clone(mtx->get_executor()->get_master()); - tmp = static_cast(op.get()); + tmp = static_cast(op.get()); } else { tmp = mtx; } @@ -276,14 +276,14 @@ inline void write_impl(const MatrixType *mtx, MatrixData &data) template -void Diagonal::write(mat_data &data) const +void Diagonal::write(mat_data& data) const { write_impl(this, data); } template -void Diagonal::write(mat_data32 &data) const +void Diagonal::write(mat_data32& data) const { write_impl(this, data); } diff --git a/core/matrix/diagonal_kernels.hpp b/core/matrix/diagonal_kernels.hpp index 3db4c42478b..c31f2f61b8d 100644 --- a/core/matrix/diagonal_kernels.hpp +++ b/core/matrix/diagonal_kernels.hpp @@ -48,41 +48,41 @@ namespace kernels { #define GKO_DECLARE_DIAGONAL_APPLY_TO_DENSE_KERNEL(value_type) \ void apply_to_dense(std::shared_ptr exec, \ - const matrix::Diagonal *a, \ - const matrix::Dense *b, \ - matrix::Dense *c) + const matrix::Diagonal* a, \ + const matrix::Dense* b, \ + matrix::Dense* c) #define GKO_DECLARE_DIAGONAL_RIGHT_APPLY_TO_DENSE_KERNEL(value_type) \ void right_apply_to_dense(std::shared_ptr exec, \ - const matrix::Diagonal *a, \ - const matrix::Dense *b, \ - matrix::Dense *c) + const matrix::Diagonal* a, \ + const matrix::Dense* b, \ + matrix::Dense* c) #define GKO_DECLARE_DIAGONAL_APPLY_TO_CSR_KERNEL(value_type, index_type) \ void apply_to_csr(std::shared_ptr exec, \ - const matrix::Diagonal *a, \ - const matrix::Csr *b, \ - matrix::Csr *c) + const matrix::Diagonal* a, \ + const matrix::Csr* b, \ + matrix::Csr* c) #define GKO_DECLARE_DIAGONAL_RIGHT_APPLY_TO_CSR_KERNEL(value_type, index_type) \ void right_apply_to_csr(std::shared_ptr exec, \ - const matrix::Diagonal *a, \ - const matrix::Csr *b, \ - matrix::Csr *c) + const matrix::Diagonal* a, \ + const matrix::Csr* b, \ + matrix::Csr* c) #define GKO_DECLARE_DIAGONAL_CONVERT_TO_CSR_KERNEL(ValueType, IndexType) \ void convert_to_csr(std::shared_ptr exec, \ - const matrix::Diagonal *source, \ - matrix::Csr *result) + const matrix::Diagonal* source, \ + matrix::Csr* result) #define GKO_DECLARE_DIAGONAL_CONJ_TRANSPOSE_KERNEL(ValueType) \ void conj_transpose(std::shared_ptr exec, \ - const matrix::Diagonal *orig, \ - matrix::Diagonal *trans) + const matrix::Diagonal* orig, \ + matrix::Diagonal* trans) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ diff --git a/core/matrix/ell.cpp b/core/matrix/ell.cpp index 5cd026c6310..9a144338154 100644 --- a/core/matrix/ell.cpp +++ b/core/matrix/ell.cpp @@ -80,12 +80,12 @@ namespace { template size_type calculate_max_nnz_per_row( - const matrix_data &data) + const matrix_data& data) { size_type nnz = 0; IndexType current_row = 0; size_type num_stored_elements_per_row = 0; - for (const auto &elem : data.nonzeros) { + for (const auto& elem : data.nonzeros) { if (elem.row != current_row) { current_row = elem.row; num_stored_elements_per_row = @@ -102,7 +102,7 @@ size_type calculate_max_nnz_per_row( template -void Ell::apply_impl(const LinOp *b, LinOp *x) const +void Ell::apply_impl(const LinOp* b, LinOp* x) const { mixed_precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -113,8 +113,8 @@ void Ell::apply_impl(const LinOp *b, LinOp *x) const template -void Ell::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void Ell::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { mixed_precision_dispatch_real_complex( [this, alpha, beta](auto dense_b, auto dense_x) { @@ -130,7 +130,7 @@ void Ell::apply_impl(const LinOp *alpha, const LinOp *b, template void Ell::convert_to( - Ell, IndexType> *result) const + Ell, IndexType>* result) const { result->values_ = this->values_; result->col_idxs_ = this->col_idxs_; @@ -142,14 +142,14 @@ void Ell::convert_to( template void Ell::move_to( - Ell, IndexType> *result) + Ell, IndexType>* result) { this->convert_to(result); } template -void Ell::convert_to(Dense *result) const +void Ell::convert_to(Dense* result) const { auto exec = this->get_executor(); auto tmp = Dense::create(exec, this->get_size()); @@ -159,7 +159,7 @@ void Ell::convert_to(Dense *result) const template -void Ell::move_to(Dense *result) +void Ell::move_to(Dense* result) { this->convert_to(result); } @@ -167,7 +167,7 @@ void Ell::move_to(Dense *result) template void Ell::convert_to( - Csr *result) const + Csr* result) const { auto exec = this->get_executor(); @@ -184,14 +184,14 @@ void Ell::convert_to( template -void Ell::move_to(Csr *result) +void Ell::move_to(Csr* result) { this->convert_to(result); } template -void Ell::read(const mat_data &data) +void Ell::read(const mat_data& data) { // Get the number of stored elements of every row. auto num_stored_elements_per_row = calculate_max_nnz_per_row(data); @@ -228,13 +228,13 @@ void Ell::read(const mat_data &data) template -void Ell::write(mat_data &data) const +void Ell::write(mat_data& data) const { std::unique_ptr op{}; - const Ell *tmp{}; + const Ell* tmp{}; if (this->get_executor()->get_master() != this->get_executor()) { op = this->clone(this->get_executor()->get_master()); - tmp = static_cast(op.get()); + tmp = static_cast(op.get()); } else { tmp = this; } diff --git a/core/matrix/ell_kernels.hpp b/core/matrix/ell_kernels.hpp index 7cbe54b3720..8c25f9cb18f 100644 --- a/core/matrix/ell_kernels.hpp +++ b/core/matrix/ell_kernels.hpp @@ -49,45 +49,45 @@ namespace kernels { #define GKO_DECLARE_ELL_SPMV_KERNEL(InputValueType, MatrixValueType, \ OutputValueType, IndexType) \ void spmv(std::shared_ptr exec, \ - const matrix::Ell *a, \ - const matrix::Dense *b, \ - matrix::Dense *c) + const matrix::Ell* a, \ + const matrix::Dense* b, \ + matrix::Dense* c) #define GKO_DECLARE_ELL_ADVANCED_SPMV_KERNEL(InputValueType, MatrixValueType, \ OutputValueType, IndexType) \ void advanced_spmv(std::shared_ptr exec, \ - const matrix::Dense *alpha, \ - const matrix::Ell *a, \ - const matrix::Dense *b, \ - const matrix::Dense *beta, \ - matrix::Dense *c) + const matrix::Dense* alpha, \ + const matrix::Ell* a, \ + const matrix::Dense* b, \ + const matrix::Dense* beta, \ + matrix::Dense* c) #define GKO_DECLARE_ELL_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) \ void convert_to_dense(std::shared_ptr exec, \ - const matrix::Ell *source, \ - matrix::Dense *result) + const matrix::Ell* source, \ + matrix::Dense* result) #define GKO_DECLARE_ELL_CONVERT_TO_CSR_KERNEL(ValueType, IndexType) \ void convert_to_csr(std::shared_ptr exec, \ - const matrix::Ell *source, \ - matrix::Csr *result) + const matrix::Ell* source, \ + matrix::Csr* result) #define GKO_DECLARE_ELL_COUNT_NONZEROS_KERNEL(ValueType, IndexType) \ void count_nonzeros(std::shared_ptr exec, \ - const matrix::Ell *source, \ - size_type *result) + const matrix::Ell* source, \ + size_type* result) #define GKO_DECLARE_ELL_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType, \ IndexType) \ void calculate_nonzeros_per_row( \ std::shared_ptr exec, \ - const matrix::Ell *source, \ - Array *result) + const matrix::Ell* source, \ + Array* result) #define GKO_DECLARE_ELL_EXTRACT_DIAGONAL_KERNEL(ValueType, IndexType) \ void extract_diagonal(std::shared_ptr exec, \ - const matrix::Ell *orig, \ - matrix::Diagonal *diag) + const matrix::Ell* orig, \ + matrix::Diagonal* diag) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template -void Fbcsr::apply_impl(const LinOp *const b, - LinOp *const x) const +void Fbcsr::apply_impl(const LinOp* const b, + LinOp* const x) const { using Dense = Dense; - if (auto b_fbcsr = dynamic_cast *>(b)) { + if (auto b_fbcsr = dynamic_cast*>(b)) { // if b is a FBCSR matrix, we need an SpGeMM GKO_NOT_SUPPORTED(b_fbcsr); } else { @@ -168,16 +168,16 @@ void Fbcsr::apply_impl(const LinOp *const b, template -void Fbcsr::apply_impl(const LinOp *const alpha, - const LinOp *const b, - const LinOp *const beta, - LinOp *const x) const +void Fbcsr::apply_impl(const LinOp* const alpha, + const LinOp* const b, + const LinOp* const beta, + LinOp* const x) const { using Dense = Dense; - if (auto b_fbcsr = dynamic_cast *>(b)) { + if (auto b_fbcsr = dynamic_cast*>(b)) { // if b is a FBCSR matrix, we need an SpGeMM GKO_NOT_SUPPORTED(b_fbcsr); - } else if (auto b_ident = dynamic_cast *>(b)) { + } else if (auto b_ident = dynamic_cast*>(b)) { // if b is an identity matrix, we need an SpGEAM GKO_NOT_SUPPORTED(b_ident); } else { @@ -191,7 +191,7 @@ void Fbcsr::apply_impl(const LinOp *const alpha, template void Fbcsr::convert_to( - Fbcsr, IndexType> *const result) const + Fbcsr, IndexType>* const result) const { result->values_ = this->values_; result->col_idxs_ = this->col_idxs_; @@ -204,7 +204,7 @@ void Fbcsr::convert_to( template void Fbcsr::move_to( - Fbcsr, IndexType> *const result) + Fbcsr, IndexType>* const result) { this->convert_to(result); } @@ -212,7 +212,7 @@ void Fbcsr::move_to( template void Fbcsr::convert_to( - Dense *const result) const + Dense* const result) const { auto exec = this->get_executor(); auto tmp = Dense::create(exec, this->get_size()); @@ -222,7 +222,7 @@ void Fbcsr::convert_to( template -void Fbcsr::move_to(Dense *const result) +void Fbcsr::move_to(Dense* const result) { this->convert_to(result); } @@ -230,7 +230,7 @@ void Fbcsr::move_to(Dense *const result) template void Fbcsr::convert_to( - Csr *const result) const + Csr* const result) const { auto exec = this->get_executor(); auto tmp = Csr::create( @@ -243,7 +243,7 @@ void Fbcsr::convert_to( template void Fbcsr::move_to( - Csr *const result) + Csr* const result) { this->convert_to(result); } @@ -251,7 +251,7 @@ void Fbcsr::move_to( template void Fbcsr::convert_to( - SparsityCsr *const result) const + SparsityCsr* const result) const { auto exec = this->get_executor(); auto tmp = SparsityCsr::create( @@ -269,7 +269,7 @@ void Fbcsr::convert_to( template void Fbcsr::move_to( - SparsityCsr *const result) + SparsityCsr* const result) { this->convert_to(result); } @@ -281,7 +281,7 @@ void Fbcsr::move_to( * @note Can this be changed to a parallel O(nnz) implementation? */ template -void Fbcsr::read(const mat_data &data) +void Fbcsr::read(const mat_data& data) { GKO_ENSURE_IN_BOUNDS(data.nonzeros.size(), std::numeric_limits::max()); @@ -297,7 +297,7 @@ void Fbcsr::read(const mat_data &data) }; struct FbLess { - bool operator()(const FbEntry &a, const FbEntry &b) const + bool operator()(const FbEntry& a, const FbEntry& b) const { if (a.block_row != b.block_row) return a.block_row < b.block_row; @@ -306,7 +306,7 @@ void Fbcsr::read(const mat_data &data) } }; - auto create_block_map = [nnz, bs](const mat_data &mdata) { + auto create_block_map = [nnz, bs](const mat_data& mdata) { std::map blocks; for (index_type inz = 0; inz < nnz; inz++) { const index_type row = mdata.nonzeros[inz].row; @@ -318,7 +318,7 @@ void Fbcsr::read(const mat_data &data) const index_type blockrow = row / bs; const index_type blockcol = col / bs; - Block_t &nnzblk = blocks[{blockrow, blockcol}]; + Block_t& nnzblk = blocks[{blockrow, blockcol}]; if (nnzblk.size() == 0) { nnzblk.resize(bs, bs); nnzblk.zero(); @@ -383,13 +383,13 @@ void Fbcsr::read(const mat_data &data) template -void Fbcsr::write(mat_data &data) const +void Fbcsr::write(mat_data& data) const { std::unique_ptr op{}; - const Fbcsr *tmp{}; + const Fbcsr* tmp{}; if (this->get_executor()->get_master() != this->get_executor()) { op = this->clone(this->get_executor()->get_master()); - tmp = static_cast(op.get()); + tmp = static_cast(op.get()); } else { tmp = this; } diff --git a/core/matrix/fbcsr_builder.hpp b/core/matrix/fbcsr_builder.hpp index df10c2a3a57..e5deb5167bc 100644 --- a/core/matrix/fbcsr_builder.hpp +++ b/core/matrix/fbcsr_builder.hpp @@ -55,12 +55,12 @@ class FbcsrBuilder { /** * @return The column index array of the matrix. */ - Array &get_col_idx_array() { return matrix_->col_idxs_; } + Array& get_col_idx_array() { return matrix_->col_idxs_; } /** * @return The value array of the matrix. */ - Array &get_value_array() { return matrix_->values_; } + Array& get_value_array() { return matrix_->values_; } /** * @return The (uniform) block size @@ -71,20 +71,20 @@ class FbcsrBuilder { * @param matrix An existing FBCSR matrix * for which intrusive access is needed */ - explicit FbcsrBuilder(Fbcsr *const matrix) + explicit FbcsrBuilder(Fbcsr* const matrix) : matrix_{matrix} {} ~FbcsrBuilder() = default; // make this type non-movable - FbcsrBuilder(const FbcsrBuilder &) = delete; - FbcsrBuilder(FbcsrBuilder &&) = delete; - FbcsrBuilder &operator=(const FbcsrBuilder &) = delete; - FbcsrBuilder &operator=(FbcsrBuilder &&) = delete; + FbcsrBuilder(const FbcsrBuilder&) = delete; + FbcsrBuilder(FbcsrBuilder&&) = delete; + FbcsrBuilder& operator=(const FbcsrBuilder&) = delete; + FbcsrBuilder& operator=(FbcsrBuilder&&) = delete; private: - Fbcsr *matrix_; + Fbcsr* matrix_; }; diff --git a/core/matrix/fbcsr_kernels.hpp b/core/matrix/fbcsr_kernels.hpp index 5d2492a3b37..90d40276948 100644 --- a/core/matrix/fbcsr_kernels.hpp +++ b/core/matrix/fbcsr_kernels.hpp @@ -51,63 +51,63 @@ namespace kernels { #define GKO_DECLARE_FBCSR_SPMV_KERNEL(ValueType, IndexType) \ void spmv(std::shared_ptr exec, \ - const matrix::Fbcsr *a, \ - const matrix::Dense *b, matrix::Dense *c) + const matrix::Fbcsr* a, \ + const matrix::Dense* b, matrix::Dense* c) #define GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL(ValueType, IndexType) \ void advanced_spmv(std::shared_ptr exec, \ - const matrix::Dense *alpha, \ - const matrix::Fbcsr *a, \ - const matrix::Dense *b, \ - const matrix::Dense *beta, \ - matrix::Dense *c) + const matrix::Dense* alpha, \ + const matrix::Fbcsr* a, \ + const matrix::Dense* b, \ + const matrix::Dense* beta, \ + matrix::Dense* c) #define GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) \ void convert_to_dense(std::shared_ptr exec, \ - const matrix::Fbcsr *source, \ - matrix::Dense *result) + const matrix::Fbcsr* source, \ + matrix::Dense* result) #define GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL(ValueType, IndexType) \ void convert_to_csr(std::shared_ptr exec, \ - const matrix::Fbcsr *source, \ - matrix::Csr *result) + const matrix::Fbcsr* source, \ + matrix::Csr* result) #define GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL(ValueType, IndexType) \ void transpose(std::shared_ptr exec, \ - const matrix::Fbcsr *orig, \ - matrix::Fbcsr *trans) + const matrix::Fbcsr* orig, \ + matrix::Fbcsr* trans) #define GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType) \ void conj_transpose(std::shared_ptr exec, \ - const matrix::Fbcsr *orig, \ - matrix::Fbcsr *trans) + const matrix::Fbcsr* orig, \ + matrix::Fbcsr* trans) #define GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL(ValueType, \ IndexType) \ void calculate_max_nnz_per_row( \ std::shared_ptr exec, \ - const matrix::Fbcsr *source, size_type *result) + const matrix::Fbcsr* source, size_type* result) #define GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL(ValueType, \ IndexType) \ void calculate_nonzeros_per_row( \ std::shared_ptr exec, \ - const matrix::Fbcsr *source, \ - Array *result) + const matrix::Fbcsr* source, \ + Array* result) #define GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType) \ void sort_by_column_index(std::shared_ptr exec, \ - matrix::Fbcsr *to_sort) + matrix::Fbcsr* to_sort) #define GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, IndexType) \ void is_sorted_by_column_index( \ std::shared_ptr exec, \ - const matrix::Fbcsr *to_check, bool *is_sorted) + const matrix::Fbcsr* to_check, bool* is_sorted) #define GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL(ValueType, IndexType) \ void extract_diagonal(std::shared_ptr exec, \ - const matrix::Fbcsr *orig, \ - matrix::Diagonal *diag) + const matrix::Fbcsr* orig, \ + matrix::Diagonal* diag) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ diff --git a/core/matrix/hybrid.cpp b/core/matrix/hybrid.cpp index 7ad40dfe529..0e9686a987e 100644 --- a/core/matrix/hybrid.cpp +++ b/core/matrix/hybrid.cpp @@ -77,8 +77,8 @@ namespace { template -void get_each_row_nnz(const matrix_data &data, - Array &row_nnz) +void get_each_row_nnz(const matrix_data& data, + Array& row_nnz) { size_type nnz = 0; IndexType current_row = 0; @@ -86,7 +86,7 @@ void get_each_row_nnz(const matrix_data &data, for (size_type i = 0; i < row_nnz.get_num_elems(); i++) { row_nnz_val[i] = zero(); } - for (const auto &elem : data.nonzeros) { + for (const auto& elem : data.nonzeros) { if (elem.row != current_row) { row_nnz_val[current_row] = nnz; current_row = elem.row; @@ -102,7 +102,7 @@ void get_each_row_nnz(const matrix_data &data, template -void Hybrid::apply_impl(const LinOp *b, LinOp *x) const +void Hybrid::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -116,9 +116,9 @@ void Hybrid::apply_impl(const LinOp *b, LinOp *x) const template -void Hybrid::apply_impl(const LinOp *alpha, - const LinOp *b, const LinOp *beta, - LinOp *x) const +void Hybrid::apply_impl(const LinOp* alpha, + const LinOp* b, const LinOp* beta, + LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { @@ -133,7 +133,7 @@ void Hybrid::apply_impl(const LinOp *alpha, template void Hybrid::convert_to( - Hybrid, IndexType> *result) const + Hybrid, IndexType>* result) const { this->ell_->convert_to(result->ell_.get()); this->coo_->convert_to(result->coo_.get()); @@ -146,14 +146,14 @@ void Hybrid::convert_to( template void Hybrid::move_to( - Hybrid, IndexType> *result) + Hybrid, IndexType>* result) { this->convert_to(result); } template -void Hybrid::convert_to(Dense *result) const +void Hybrid::convert_to(Dense* result) const { auto exec = this->get_executor(); auto tmp = Dense::create(exec, this->get_size()); @@ -163,7 +163,7 @@ void Hybrid::convert_to(Dense *result) const template -void Hybrid::move_to(Dense *result) +void Hybrid::move_to(Dense* result) { this->convert_to(result); } @@ -171,7 +171,7 @@ void Hybrid::move_to(Dense *result) template void Hybrid::convert_to( - Csr *result) const + Csr* result) const { auto exec = this->get_executor(); @@ -188,14 +188,14 @@ void Hybrid::convert_to( template -void Hybrid::move_to(Csr *result) +void Hybrid::move_to(Csr* result) { this->convert_to(result); } template -void Hybrid::read(const mat_data &data) +void Hybrid::read(const mat_data& data) { // get the limitation of columns of the ell part // calculate coo storage @@ -253,7 +253,7 @@ void Hybrid::read(const mat_data &data) template -void Hybrid::write(mat_data &data) const +void Hybrid::write(mat_data& data) const { std::unique_ptr op{}; auto tmp_clone = diff --git a/core/matrix/hybrid_kernels.hpp b/core/matrix/hybrid_kernels.hpp index b0222cba0bd..de01017631b 100644 --- a/core/matrix/hybrid_kernels.hpp +++ b/core/matrix/hybrid_kernels.hpp @@ -46,18 +46,18 @@ namespace kernels { #define GKO_DECLARE_HYBRID_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) \ void convert_to_dense(std::shared_ptr exec, \ - const matrix::Hybrid *source, \ - matrix::Dense *result) + const matrix::Hybrid* source, \ + matrix::Dense* result) #define GKO_DECLARE_HYBRID_CONVERT_TO_CSR_KERNEL(ValueType, IndexType) \ void convert_to_csr(std::shared_ptr exec, \ - const matrix::Hybrid *source, \ - matrix::Csr *result) + const matrix::Hybrid* source, \ + matrix::Csr* result) #define GKO_DECLARE_HYBRID_COUNT_NONZEROS_KERNEL(ValueType, IndexType) \ void count_nonzeros(std::shared_ptr exec, \ - const matrix::Hybrid *source, \ - size_type *result) + const matrix::Hybrid* source, \ + size_type* result) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ diff --git a/core/matrix/identity.cpp b/core/matrix/identity.cpp index 169a4f4ba68..dad4bef49cb 100644 --- a/core/matrix/identity.cpp +++ b/core/matrix/identity.cpp @@ -44,15 +44,15 @@ namespace matrix { template -void Identity::apply_impl(const LinOp *b, LinOp *x) const +void Identity::apply_impl(const LinOp* b, LinOp* x) const { x->copy_from(b); } template -void Identity::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void Identity::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { diff --git a/core/matrix/sellp.cpp b/core/matrix/sellp.cpp index d51b0fb04c7..7ab997738fd 100644 --- a/core/matrix/sellp.cpp +++ b/core/matrix/sellp.cpp @@ -75,16 +75,16 @@ namespace { template -size_type calculate_total_cols(const matrix_data &data, +size_type calculate_total_cols(const matrix_data& data, const size_type slice_size, const size_type stride_factor, - vector &slice_lengths) + vector& slice_lengths) { size_type nonzeros_per_row = 0; IndexType current_row = 0; IndexType current_slice = 0; size_type total_cols = 0; - for (const auto &elem : data.nonzeros) { + for (const auto& elem : data.nonzeros) { if (elem.row != current_row) { current_row = elem.row; slice_lengths[current_slice] = @@ -113,7 +113,7 @@ size_type calculate_total_cols(const matrix_data &data, template -void Sellp::apply_impl(const LinOp *b, LinOp *x) const +void Sellp::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -124,8 +124,8 @@ void Sellp::apply_impl(const LinOp *b, LinOp *x) const template -void Sellp::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void Sellp::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { @@ -138,7 +138,7 @@ void Sellp::apply_impl(const LinOp *alpha, const LinOp *b, template void Sellp::convert_to( - Sellp, IndexType> *result) const + Sellp, IndexType>* result) const { result->values_ = this->values_; result->col_idxs_ = this->col_idxs_; @@ -153,14 +153,14 @@ void Sellp::convert_to( template void Sellp::move_to( - Sellp, IndexType> *result) + Sellp, IndexType>* result) { this->convert_to(result); } template -void Sellp::convert_to(Dense *result) const +void Sellp::convert_to(Dense* result) const { auto exec = this->get_executor(); auto tmp = Dense::create(exec, this->get_size()); @@ -170,7 +170,7 @@ void Sellp::convert_to(Dense *result) const template -void Sellp::move_to(Dense *result) +void Sellp::move_to(Dense* result) { this->convert_to(result); } @@ -178,7 +178,7 @@ void Sellp::move_to(Dense *result) template void Sellp::convert_to( - Csr *result) const + Csr* result) const { auto exec = this->get_executor(); @@ -193,14 +193,14 @@ void Sellp::convert_to( template -void Sellp::move_to(Csr *result) +void Sellp::move_to(Csr* result) { this->convert_to(result); } template -void Sellp::read(const mat_data &data) +void Sellp::read(const mat_data& data) { // Make sure that slice_size and stride factor are not zero. auto slice_size = (this->get_slice_size() == 0) ? default_slice_size @@ -264,13 +264,13 @@ void Sellp::read(const mat_data &data) template -void Sellp::write(mat_data &data) const +void Sellp::write(mat_data& data) const { std::unique_ptr op{}; - const Sellp *tmp{}; + const Sellp* tmp{}; if (this->get_executor()->get_master() != this->get_executor()) { op = this->clone(this->get_executor()->get_master()); - tmp = static_cast(op.get()); + tmp = static_cast(op.get()); } else { tmp = this; } diff --git a/core/matrix/sellp_kernels.hpp b/core/matrix/sellp_kernels.hpp index a2ea6f87682..fd3f369694c 100644 --- a/core/matrix/sellp_kernels.hpp +++ b/core/matrix/sellp_kernels.hpp @@ -48,36 +48,36 @@ namespace kernels { #define GKO_DECLARE_SELLP_SPMV_KERNEL(ValueType, IndexType) \ void spmv(std::shared_ptr exec, \ - const matrix::Sellp *a, \ - const matrix::Dense *b, matrix::Dense *c) + const matrix::Sellp* a, \ + const matrix::Dense* b, matrix::Dense* c) #define GKO_DECLARE_SELLP_ADVANCED_SPMV_KERNEL(ValueType, IndexType) \ void advanced_spmv(std::shared_ptr exec, \ - const matrix::Dense *alpha, \ - const matrix::Sellp *a, \ - const matrix::Dense *b, \ - const matrix::Dense *beta, \ - matrix::Dense *c) + const matrix::Dense* alpha, \ + const matrix::Sellp* a, \ + const matrix::Dense* b, \ + const matrix::Dense* beta, \ + matrix::Dense* c) #define GKO_DECLARE_SELLP_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) \ void convert_to_dense(std::shared_ptr exec, \ - const matrix::Sellp *source, \ - matrix::Dense *result) + const matrix::Sellp* source, \ + matrix::Dense* result) #define GKO_DECLARE_SELLP_CONVERT_TO_CSR_KERNEL(ValueType, IndexType) \ void convert_to_csr(std::shared_ptr exec, \ - const matrix::Sellp *source, \ - matrix::Csr *result) + const matrix::Sellp* source, \ + matrix::Csr* result) #define GKO_DECLARE_SELLP_COUNT_NONZEROS_KERNEL(ValueType, IndexType) \ void count_nonzeros(std::shared_ptr exec, \ - const matrix::Sellp *source, \ - size_type *result) + const matrix::Sellp* source, \ + size_type* result) #define GKO_DECLARE_SELLP_EXTRACT_DIAGONAL_KERNEL(ValueType, IndexType) \ void extract_diagonal(std::shared_ptr exec, \ - const matrix::Sellp *orig, \ - matrix::Diagonal *diag) + const matrix::Sellp* orig, \ + matrix::Diagonal* diag) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ diff --git a/core/matrix/sparsity_csr.cpp b/core/matrix/sparsity_csr.cpp index e5a45557934..e4991afcf52 100644 --- a/core/matrix/sparsity_csr.cpp +++ b/core/matrix/sparsity_csr.cpp @@ -68,8 +68,8 @@ GKO_REGISTER_OPERATION(is_sorted_by_column_index, template -void SparsityCsr::apply_impl(const LinOp *b, - LinOp *x) const +void SparsityCsr::apply_impl(const LinOp* b, + LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -81,10 +81,10 @@ void SparsityCsr::apply_impl(const LinOp *b, template -void SparsityCsr::apply_impl(const LinOp *alpha, - const LinOp *b, - const LinOp *beta, - LinOp *x) const +void SparsityCsr::apply_impl(const LinOp* alpha, + const LinOp* b, + const LinOp* beta, + LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { @@ -96,10 +96,10 @@ void SparsityCsr::apply_impl(const LinOp *alpha, template -void SparsityCsr::read(const mat_data &data) +void SparsityCsr::read(const mat_data& data) { size_type nnz = 0; - for (const auto &elem : data.nonzeros) { + for (const auto& elem : data.nonzeros) { nnz += (elem.value != zero()); } auto tmp = @@ -126,13 +126,13 @@ void SparsityCsr::read(const mat_data &data) template -void SparsityCsr::write(mat_data &data) const +void SparsityCsr::write(mat_data& data) const { std::unique_ptr op{}; - const SparsityCsr *tmp{}; + const SparsityCsr* tmp{}; if (this->get_executor()->get_master() != this->get_executor()) { op = this->clone(this->get_executor()->get_master()); - tmp = static_cast(op.get()); + tmp = static_cast(op.get()); } else { tmp = this; } diff --git a/core/matrix/sparsity_csr_kernels.hpp b/core/matrix/sparsity_csr_kernels.hpp index a9d17cdd133..e4ebcee8564 100644 --- a/core/matrix/sparsity_csr_kernels.hpp +++ b/core/matrix/sparsity_csr_kernels.hpp @@ -47,47 +47,47 @@ namespace kernels { #define GKO_DECLARE_SPARSITY_CSR_SPMV_KERNEL(ValueType, IndexType) \ void spmv(std::shared_ptr exec, \ - const matrix::SparsityCsr *a, \ - const matrix::Dense *b, matrix::Dense *c) + const matrix::SparsityCsr* a, \ + const matrix::Dense* b, matrix::Dense* c) #define GKO_DECLARE_SPARSITY_CSR_ADVANCED_SPMV_KERNEL(ValueType, IndexType) \ void advanced_spmv(std::shared_ptr exec, \ - const matrix::Dense *alpha, \ - const matrix::SparsityCsr *a, \ - const matrix::Dense *b, \ - const matrix::Dense *beta, \ - matrix::Dense *c) + const matrix::Dense* alpha, \ + const matrix::SparsityCsr* a, \ + const matrix::Dense* b, \ + const matrix::Dense* beta, \ + matrix::Dense* c) #define GKO_DECLARE_SPARSITY_CSR_REMOVE_DIAGONAL_ELEMENTS_KERNEL(ValueType, \ IndexType) \ void remove_diagonal_elements( \ std::shared_ptr exec, \ - const IndexType *row_ptrs, const IndexType *col_idxs, \ - matrix::SparsityCsr *matrix) + const IndexType* row_ptrs, const IndexType* col_idxs, \ + matrix::SparsityCsr* matrix) #define GKO_DECLARE_SPARSITY_CSR_COUNT_NUM_DIAGONAL_ELEMENTS_KERNEL(ValueType, \ IndexType) \ void count_num_diagonal_elements( \ std::shared_ptr exec, \ - const matrix::SparsityCsr *matrix, \ - size_type *num_diagonal_elements) + const matrix::SparsityCsr* matrix, \ + size_type* num_diagonal_elements) #define GKO_DECLARE_SPARSITY_CSR_TRANSPOSE_KERNEL(ValueType, IndexType) \ void transpose(std::shared_ptr exec, \ - const matrix::SparsityCsr *orig, \ - matrix::SparsityCsr *trans) + const matrix::SparsityCsr* orig, \ + matrix::SparsityCsr* trans) #define GKO_DECLARE_SPARSITY_CSR_SORT_BY_COLUMN_INDEX(ValueType, IndexType) \ void sort_by_column_index( \ std::shared_ptr exec, \ - matrix::SparsityCsr *to_sort) + matrix::SparsityCsr* to_sort) #define GKO_DECLARE_SPARSITY_CSR_IS_SORTED_BY_COLUMN_INDEX(ValueType, \ IndexType) \ void is_sorted_by_column_index( \ std::shared_ptr exec, \ - const matrix::SparsityCsr *to_check, \ - bool *is_sorted) + const matrix::SparsityCsr* to_check, \ + bool* is_sorted) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ diff --git a/core/multigrid/amgx_pgm.cpp b/core/multigrid/amgx_pgm.cpp index ff4e2423636..84333694edf 100644 --- a/core/multigrid/amgx_pgm.cpp +++ b/core/multigrid/amgx_pgm.cpp @@ -81,10 +81,10 @@ void AmgxPgm::generate() Array intermediate_agg(this->get_executor(), parameters_.deterministic * num_rows); // Only support csr matrix currently. - const matrix_type *amgxpgm_op = nullptr; + const matrix_type* amgxpgm_op = nullptr; // Store the csr matrix if needed auto amgxpgm_op_unique_ptr = matrix_type::create(exec); - amgxpgm_op = dynamic_cast(system_matrix_.get()); + amgxpgm_op = dynamic_cast(system_matrix_.get()); if (!amgxpgm_op) { // if original matrix is not csr, converting it to csr. as>(this->system_matrix_.get()) diff --git a/core/multigrid/amgx_pgm_kernels.hpp b/core/multigrid/amgx_pgm_kernels.hpp index 793780ae505..ab02b7a87d1 100644 --- a/core/multigrid/amgx_pgm_kernels.hpp +++ b/core/multigrid/amgx_pgm_kernels.hpp @@ -53,30 +53,30 @@ namespace amgx_pgm { #define GKO_DECLARE_AMGX_PGM_MATCH_EDGE_KERNEL(IndexType) \ void match_edge(std::shared_ptr exec, \ - const Array &strongest_neighbor, \ - Array &agg) + const Array& strongest_neighbor, \ + Array& agg) #define GKO_DECLARE_AMGX_PGM_COUNT_UNAGG_KERNEL(IndexType) \ void count_unagg(std::shared_ptr exec, \ - const Array &agg, IndexType *num_unagg) + const Array& agg, IndexType* num_unagg) #define GKO_DECLARE_AMGX_PGM_RENUMBER_KERNEL(IndexType) \ void renumber(std::shared_ptr exec, \ - Array &agg, IndexType *num_agg) + Array& agg, IndexType* num_agg) #define GKO_DECLARE_AMGX_PGM_FIND_STRONGEST_NEIGHBOR(ValueType, IndexType) \ void find_strongest_neighbor( \ std::shared_ptr exec, \ - const matrix::Csr *weight_mtx, \ - const matrix::Diagonal *diag, Array &agg, \ - Array &strongest_neighbor) + const matrix::Csr* weight_mtx, \ + const matrix::Diagonal* diag, Array& agg, \ + Array& strongest_neighbor) #define GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG(ValueType, IndexType) \ void assign_to_exist_agg( \ std::shared_ptr exec, \ - const matrix::Csr *weight_mtx, \ - const matrix::Diagonal *diag, Array &agg, \ - Array &intermediate_agg) + const matrix::Csr* weight_mtx, \ + const matrix::Diagonal* diag, Array& agg, \ + Array& intermediate_agg) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ diff --git a/core/preconditioner/isai.cpp b/core/preconditioner/isai.cpp index 08f5508f395..ca0f837f71f 100644 --- a/core/preconditioner/isai.cpp +++ b/core/preconditioner/isai.cpp @@ -86,7 +86,7 @@ GKO_REGISTER_OPERATION(initialize_l, factorization::initialize_l); * If `power` is 1, the matrix will be returned unchanged. */ template -std::shared_ptr extend_sparsity(std::shared_ptr &exec, +std::shared_ptr extend_sparsity(std::shared_ptr& exec, std::shared_ptr mtx, int power) { GKO_ASSERT_EQ(power >= 1, true); diff --git a/core/preconditioner/isai_kernels.hpp b/core/preconditioner/isai_kernels.hpp index bdf6360f1f4..aa9f8f561be 100644 --- a/core/preconditioner/isai_kernels.hpp +++ b/core/preconditioner/isai_kernels.hpp @@ -46,40 +46,40 @@ namespace kernels { #define GKO_DECLARE_ISAI_GENERATE_TRI_INVERSE_KERNEL(ValueType, IndexType) \ void generate_tri_inverse(std::shared_ptr exec, \ - const matrix::Csr *input, \ - matrix::Csr *inverse, \ - IndexType *excess_rhs_ptrs, \ - IndexType *excess_nz_ptrs, bool lower) + const matrix::Csr* input, \ + matrix::Csr* inverse, \ + IndexType* excess_rhs_ptrs, \ + IndexType* excess_nz_ptrs, bool lower) #define GKO_DECLARE_ISAI_GENERATE_GENERAL_INVERSE_KERNEL(ValueType, IndexType) \ void generate_general_inverse( \ std::shared_ptr exec, \ - const matrix::Csr *input, \ - matrix::Csr *inverse, \ - IndexType *excess_rhs_ptrs, IndexType *excess_nz_ptrs, bool spd) + const matrix::Csr* input, \ + matrix::Csr* inverse, \ + IndexType* excess_rhs_ptrs, IndexType* excess_nz_ptrs, bool spd) #define GKO_DECLARE_ISAI_GENERATE_EXCESS_SYSTEM_KERNEL(ValueType, IndexType) \ void generate_excess_system( \ std::shared_ptr exec, \ - const matrix::Csr *input, \ - const matrix::Csr *inverse, \ - const IndexType *excess_rhs_ptrs, const IndexType *excess_nz_ptrs, \ - matrix::Csr *excess_system, \ - matrix::Dense *excess_rhs, size_type e_start, \ + const matrix::Csr* input, \ + const matrix::Csr* inverse, \ + const IndexType* excess_rhs_ptrs, const IndexType* excess_nz_ptrs, \ + matrix::Csr* excess_system, \ + matrix::Dense* excess_rhs, size_type e_start, \ size_type e_end) #define GKO_DECLARE_ISAI_SCALE_EXCESS_SOLUTION_KERNEL(ValueType, IndexType) \ void scale_excess_solution(std::shared_ptr exec, \ - const IndexType *excess_block_ptrs, \ - matrix::Dense *excess_solution, \ + const IndexType* excess_block_ptrs, \ + matrix::Dense* excess_solution, \ size_type e_start, size_type e_end) #define GKO_DECLARE_ISAI_SCATTER_EXCESS_SOLUTION_KERNEL(ValueType, IndexType) \ void scatter_excess_solution( \ std::shared_ptr exec, \ - const IndexType *excess_rhs_ptrs, \ - const matrix::Dense *excess_solution, \ - matrix::Csr *inverse, size_type e_start, \ + const IndexType* excess_rhs_ptrs, \ + const matrix::Dense* excess_solution, \ + matrix::Csr* inverse, size_type e_start, \ size_type e_end) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/preconditioner/jacobi.cpp b/core/preconditioner/jacobi.cpp index d908a7e0ac3..eeb44e6b57b 100644 --- a/core/preconditioner/jacobi.cpp +++ b/core/preconditioner/jacobi.cpp @@ -79,7 +79,7 @@ GKO_REGISTER_OPERATION(initialize_precisions, jacobi::initialize_precisions); template -void Jacobi::apply_impl(const LinOp *b, LinOp *x) const +void Jacobi::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -98,9 +98,9 @@ void Jacobi::apply_impl(const LinOp *b, LinOp *x) const template -void Jacobi::apply_impl(const LinOp *alpha, - const LinOp *b, const LinOp *beta, - LinOp *x) const +void Jacobi::apply_impl(const LinOp* alpha, + const LinOp* b, const LinOp* beta, + LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { @@ -121,7 +121,7 @@ void Jacobi::apply_impl(const LinOp *alpha, template void Jacobi::convert_to( - matrix::Dense *result) const + matrix::Dense* result) const { auto exec = this->get_executor(); auto tmp = matrix::Dense::create(exec, this->get_size()); @@ -138,14 +138,14 @@ void Jacobi::convert_to( template -void Jacobi::move_to(matrix::Dense *result) +void Jacobi::move_to(matrix::Dense* result) { this->convert_to(result); // no special optimization possible here } template -void Jacobi::write(mat_data &data) const +void Jacobi::write(mat_data& data) const { auto local_clone = make_temporary_clone(this->get_executor()->get_master(), this); @@ -173,7 +173,7 @@ void Jacobi::write(mat_data &data) const precisions ? precisions[block] : precision_reduction(); GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION(ValueType, prec, { const auto block_data = - reinterpret_cast(group_data) + + reinterpret_cast(group_data) + scheme.get_block_offset(block); for (IndexType row = 0; row < block_size; ++row) { for (IndexType col = 0; col < block_size; ++col) { @@ -244,7 +244,7 @@ std::unique_ptr Jacobi::conj_transpose() const template void Jacobi::detect_blocks( - const matrix::Csr *system_matrix) + const matrix::Csr* system_matrix) { parameters_.block_pointers.resize_and_reset(system_matrix->get_size()[0] + 1); @@ -257,7 +257,7 @@ void Jacobi::detect_blocks( template -void Jacobi::generate(const LinOp *system_matrix, +void Jacobi::generate(const LinOp* system_matrix, bool skip_sorting) { GKO_ASSERT_IS_SQUARE_MATRIX(system_matrix); @@ -289,7 +289,7 @@ void Jacobi::generate(const LinOp *system_matrix, const auto all_block_opt = parameters_.storage_optimization.of_all_blocks; - auto &precisions = parameters_.storage_optimization.block_wise; + auto& precisions = parameters_.storage_optimization.block_wise; // if adaptive version is used, make sure that the precision array is of // the correct size by replicating it multiple times if needed if (parameters_.storage_optimization.is_block_wise || diff --git a/core/preconditioner/jacobi_kernels.hpp b/core/preconditioner/jacobi_kernels.hpp index 7745438abda..4725f146843 100644 --- a/core/preconditioner/jacobi_kernels.hpp +++ b/core/preconditioner/jacobi_kernels.hpp @@ -46,109 +46,109 @@ namespace kernels { #define GKO_DECLARE_JACOBI_FIND_BLOCKS_KERNEL(ValueType, IndexType) \ void find_blocks(std::shared_ptr exec, \ - const matrix::Csr *system_matrix, \ - uint32 max_block_size, size_type &num_blocks, \ - Array &block_pointers) - -#define GKO_DECLARE_JACOBI_GENERATE_KERNEL(ValueType, IndexType) \ - void generate( \ - std::shared_ptr exec, \ - const matrix::Csr *system_matrix, \ - size_type num_blocks, uint32 max_block_size, \ - remove_complex accuracy, \ - const preconditioner::block_interleaved_storage_scheme \ - &storage_scheme, \ - Array> &conditioning, \ - Array &block_precisions, \ - const Array &block_pointers, Array &blocks) + const matrix::Csr* system_matrix, \ + uint32 max_block_size, size_type& num_blocks, \ + Array& block_pointers) + +#define GKO_DECLARE_JACOBI_GENERATE_KERNEL(ValueType, IndexType) \ + void generate( \ + std::shared_ptr exec, \ + const matrix::Csr* system_matrix, \ + size_type num_blocks, uint32 max_block_size, \ + remove_complex accuracy, \ + const preconditioner::block_interleaved_storage_scheme& \ + storage_scheme, \ + Array>& conditioning, \ + Array& block_precisions, \ + const Array& block_pointers, Array& blocks) #define GKO_DECLARE_JACOBI_SCALAR_CONJ_KERNEL(ValueType) \ void scalar_conj(std::shared_ptr exec, \ - const Array &diag, \ - Array &conj_diag) + const Array& diag, \ + Array& conj_diag) #define GKO_DECLARE_JACOBI_INVERT_DIAGONAL_KERNEL(ValueType) \ void invert_diagonal(std::shared_ptr exec, \ - const Array &diag, \ - Array &inv_diag) + const Array& diag, \ + Array& inv_diag) #define GKO_DECLARE_JACOBI_APPLY_KERNEL(ValueType, IndexType) \ void apply( \ std::shared_ptr exec, size_type num_blocks, \ uint32 max_block_size, \ - const preconditioner::block_interleaved_storage_scheme \ - &storage_scheme, \ - const Array &block_precisions, \ - const Array &block_pointers, \ - const Array &blocks, const matrix::Dense *alpha, \ - const matrix::Dense *b, \ - const matrix::Dense *beta, matrix::Dense *x) + const preconditioner::block_interleaved_storage_scheme& \ + storage_scheme, \ + const Array& block_precisions, \ + const Array& block_pointers, \ + const Array& blocks, const matrix::Dense* alpha, \ + const matrix::Dense* b, \ + const matrix::Dense* beta, matrix::Dense* x) #define GKO_DECLARE_JACOBI_SIMPLE_SCALAR_APPLY_KERNEL(ValueType) \ void simple_scalar_apply(std::shared_ptr exec, \ - const Array &diag, \ - const matrix::Dense *b, \ - matrix::Dense *x) + const Array& diag, \ + const matrix::Dense* b, \ + matrix::Dense* x) #define GKO_DECLARE_JACOBI_SIMPLE_APPLY_KERNEL(ValueType, IndexType) \ void simple_apply( \ std::shared_ptr exec, size_type num_blocks, \ uint32 max_block_size, \ - const preconditioner::block_interleaved_storage_scheme \ - &storage_scheme, \ - const Array &block_precisions, \ - const Array &block_pointers, \ - const Array &blocks, const matrix::Dense *b, \ - matrix::Dense *x) + const preconditioner::block_interleaved_storage_scheme& \ + storage_scheme, \ + const Array& block_precisions, \ + const Array& block_pointers, \ + const Array& blocks, const matrix::Dense* b, \ + matrix::Dense* x) #define GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL(ValueType) \ void scalar_apply( \ std::shared_ptr exec, \ - const Array &diag, const matrix::Dense *alpha, \ - const matrix::Dense *b, \ - const matrix::Dense *beta, matrix::Dense *x) + const Array& diag, const matrix::Dense* alpha, \ + const matrix::Dense* b, \ + const matrix::Dense* beta, matrix::Dense* x) #define GKO_DECLARE_JACOBI_TRANSPOSE_KERNEL(ValueType, IndexType) \ void transpose_jacobi( \ std::shared_ptr exec, size_type num_blocks, \ uint32 max_block_size, \ - const Array &block_precisions, \ - const Array &block_pointers, \ - const Array &blocks, \ - const preconditioner::block_interleaved_storage_scheme \ - &storage_scheme, \ - Array &out_blocks) + const Array& block_precisions, \ + const Array& block_pointers, \ + const Array& blocks, \ + const preconditioner::block_interleaved_storage_scheme& \ + storage_scheme, \ + Array& out_blocks) #define GKO_DECLARE_JACOBI_CONJ_TRANSPOSE_KERNEL(ValueType, IndexType) \ void conj_transpose_jacobi( \ std::shared_ptr exec, size_type num_blocks, \ uint32 max_block_size, \ - const Array &block_precisions, \ - const Array &block_pointers, \ - const Array &blocks, \ - const preconditioner::block_interleaved_storage_scheme \ - &storage_scheme, \ - Array &out_blocks) + const Array& block_precisions, \ + const Array& block_pointers, \ + const Array& blocks, \ + const preconditioner::block_interleaved_storage_scheme& \ + storage_scheme, \ + Array& out_blocks) #define GKO_DECLARE_JACOBI_SCALAR_CONVERT_TO_DENSE_KERNEL(ValueType) \ void scalar_convert_to_dense(std::shared_ptr exec, \ - const Array &blocks, \ - matrix::Dense *result) + const Array& blocks, \ + matrix::Dense* result) #define GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL(ValueType, IndexType) \ void convert_to_dense( \ std::shared_ptr exec, size_type num_blocks, \ - const Array &block_precisions, \ - const Array &block_pointers, \ - const Array &blocks, \ - const preconditioner::block_interleaved_storage_scheme \ - &storage_scheme, \ - ValueType *result_values, size_type result_stride) + const Array& block_precisions, \ + const Array& block_pointers, \ + const Array& blocks, \ + const preconditioner::block_interleaved_storage_scheme& \ + storage_scheme, \ + ValueType* result_values, size_type result_stride) #define GKO_DECLARE_JACOBI_INITIALIZE_PRECISIONS_KERNEL() \ void initialize_precisions(std::shared_ptr exec, \ - const Array &source, \ - Array &precisions) + const Array& source, \ + Array& precisions) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ diff --git a/core/preconditioner/jacobi_utils.hpp b/core/preconditioner/jacobi_utils.hpp index baa7fda04b6..dc2509ce940 100644 --- a/core/preconditioner/jacobi_utils.hpp +++ b/core/preconditioner/jacobi_utils.hpp @@ -87,7 +87,7 @@ struct precision_reduction_descriptor { }; static constexpr GKO_ATTRIBUTES uint32 - singleton(const precision_reduction &pr) + singleton(const precision_reduction& pr) { return pr == precision_reduction(0, 0) ? p0n0 diff --git a/core/reorder/rcm.cpp b/core/reorder/rcm.cpp index 7224bad4ef3..af3d18d3b4d 100644 --- a/core/reorder/rcm.cpp +++ b/core/reorder/rcm.cpp @@ -67,7 +67,7 @@ GKO_REGISTER_OPERATION(get_degree_of_nodes, rcm::get_degree_of_nodes); template void Rcm::generate( - std::shared_ptr &exec, + std::shared_ptr& exec, std::unique_ptr adjacency_matrix) const { const IndexType num_rows = adjacency_matrix->get_size()[0]; diff --git a/core/reorder/rcm_kernels.hpp b/core/reorder/rcm_kernels.hpp index d72e8c0160a..2b1575af732 100644 --- a/core/reorder/rcm_kernels.hpp +++ b/core/reorder/rcm_kernels.hpp @@ -54,15 +54,15 @@ namespace kernels { #define GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL(IndexType) \ void get_permutation(std::shared_ptr exec, \ - IndexType num_vertices, const IndexType *row_ptrs, \ - const IndexType *col_idxs, const IndexType *degrees, \ - IndexType *permutation, IndexType *inv_permutation, \ + IndexType num_vertices, const IndexType* row_ptrs, \ + const IndexType* col_idxs, const IndexType* degrees, \ + IndexType* permutation, IndexType* inv_permutation, \ gko::reorder::starting_strategy strategy) #define GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL(IndexType) \ void get_degree_of_nodes(std::shared_ptr exec, \ IndexType num_vertices, \ - const IndexType *row_ptrs, IndexType *degrees) + const IndexType* row_ptrs, IndexType* degrees) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ diff --git a/core/solver/bicg.cpp b/core/solver/bicg.cpp index 0f67a4f1ce1..1890568af2b 100644 --- a/core/solver/bicg.cpp +++ b/core/solver/bicg.cpp @@ -96,10 +96,10 @@ std::unique_ptr Bicg::conj_transpose() const * before (conjugate-)transposing it */ template -std::unique_ptr conj_transpose_with_csr(const LinOp *mtx) +std::unique_ptr conj_transpose_with_csr(const LinOp* mtx) { auto csr_matrix_unique_ptr = copy_and_convert_to( - mtx->get_executor(), const_cast(mtx)); + mtx->get_executor(), const_cast(mtx)); csr_matrix_unique_ptr->set_strategy( std::make_shared()); @@ -109,7 +109,7 @@ std::unique_ptr conj_transpose_with_csr(const LinOp *mtx) template -void Bicg::apply_impl(const LinOp *b, LinOp *x) const +void Bicg::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -120,8 +120,8 @@ void Bicg::apply_impl(const LinOp *b, LinOp *x) const template -void Bicg::apply_dense_impl(const matrix::Dense *dense_b, - matrix::Dense *dense_x) const +void Bicg::apply_dense_impl(const matrix::Dense* dense_b, + matrix::Dense* dense_x) const { using std::swap; using Vector = matrix::Dense; @@ -162,7 +162,7 @@ void Bicg::apply_dense_impl(const matrix::Dense *dense_b, std::unique_ptr conj_trans_A; auto conj_transposable_system_matrix = - dynamic_cast(system_matrix_.get()); + dynamic_cast(system_matrix_.get()); if (conj_transposable_system_matrix) { conj_trans_A = conj_transposable_system_matrix->conj_transpose(); @@ -172,7 +172,7 @@ void Bicg::apply_dense_impl(const matrix::Dense *dense_b, using Csr32 = matrix::Csr; using Csr64 = matrix::Csr; auto supports_int64 = - dynamic_cast *>(system_matrix_.get()); + dynamic_cast*>(system_matrix_.get()); if (supports_int64) { conj_trans_A = conj_transpose_with_csr(system_matrix_.get()); } else { @@ -191,7 +191,7 @@ void Bicg::apply_dense_impl(const matrix::Dense *dense_b, // r2 = r auto stop_criterion = stop_criterion_factory_->generate( system_matrix_, - std::shared_ptr(dense_b, [](const LinOp *) {}), dense_x, + std::shared_ptr(dense_b, [](const LinOp*) {}), dense_x, r.get()); int iter = -1; @@ -243,8 +243,8 @@ void Bicg::apply_dense_impl(const matrix::Dense *dense_b, template -void Bicg::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void Bicg::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { diff --git a/core/solver/bicg_kernels.hpp b/core/solver/bicg_kernels.hpp index 87bac70421c..c2f113e49a2 100644 --- a/core/solver/bicg_kernels.hpp +++ b/core/solver/bicg_kernels.hpp @@ -51,32 +51,32 @@ namespace bicg { #define GKO_DECLARE_BICG_INITIALIZE_KERNEL(_type) \ void initialize(std::shared_ptr exec, \ - const matrix::Dense<_type> *b, matrix::Dense<_type> *r, \ - matrix::Dense<_type> *z, matrix::Dense<_type> *p, \ - matrix::Dense<_type> *q, matrix::Dense<_type> *prev_rho, \ - matrix::Dense<_type> *rho, matrix::Dense<_type> *r2, \ - matrix::Dense<_type> *z2, matrix::Dense<_type> *p2, \ - matrix::Dense<_type> *q2, \ - Array *stop_status) + const matrix::Dense<_type>* b, matrix::Dense<_type>* r, \ + matrix::Dense<_type>* z, matrix::Dense<_type>* p, \ + matrix::Dense<_type>* q, matrix::Dense<_type>* prev_rho, \ + matrix::Dense<_type>* rho, matrix::Dense<_type>* r2, \ + matrix::Dense<_type>* z2, matrix::Dense<_type>* p2, \ + matrix::Dense<_type>* q2, \ + Array* stop_status) #define GKO_DECLARE_BICG_STEP_1_KERNEL(_type) \ void step_1(std::shared_ptr exec, \ - matrix::Dense<_type> *p, const matrix::Dense<_type> *z, \ - matrix::Dense<_type> *p2, const matrix::Dense<_type> *z2, \ - const matrix::Dense<_type> *rho, \ - const matrix::Dense<_type> *prev_rho, \ - const Array *stop_status) + matrix::Dense<_type>* p, const matrix::Dense<_type>* z, \ + matrix::Dense<_type>* p2, const matrix::Dense<_type>* z2, \ + const matrix::Dense<_type>* rho, \ + const matrix::Dense<_type>* prev_rho, \ + const Array* stop_status) #define GKO_DECLARE_BICG_STEP_2_KERNEL(_type) \ void step_2(std::shared_ptr exec, \ - matrix::Dense<_type> *x, matrix::Dense<_type> *r, \ - matrix::Dense<_type> *r2, const matrix::Dense<_type> *p, \ - const matrix::Dense<_type> *q, const matrix::Dense<_type> *q2, \ - const matrix::Dense<_type> *beta, \ - const matrix::Dense<_type> *rho, \ - const Array *stop_status) + matrix::Dense<_type>* x, matrix::Dense<_type>* r, \ + matrix::Dense<_type>* r2, const matrix::Dense<_type>* p, \ + const matrix::Dense<_type>* q, const matrix::Dense<_type>* q2, \ + const matrix::Dense<_type>* beta, \ + const matrix::Dense<_type>* rho, \ + const Array* stop_status) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/bicgstab.cpp b/core/solver/bicgstab.cpp index cf583b5dbd6..68636756b88 100644 --- a/core/solver/bicgstab.cpp +++ b/core/solver/bicgstab.cpp @@ -88,7 +88,7 @@ std::unique_ptr Bicgstab::conj_transpose() const template -void Bicgstab::apply_impl(const LinOp *b, LinOp *x) const +void Bicgstab::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -100,8 +100,8 @@ void Bicgstab::apply_impl(const LinOp *b, LinOp *x) const template void Bicgstab::apply_dense_impl( - const matrix::Dense *dense_b, - matrix::Dense *dense_x) const + const matrix::Dense* dense_b, + matrix::Dense* dense_x) const { using std::swap; using Vector = matrix::Dense; @@ -147,7 +147,7 @@ void Bicgstab::apply_dense_impl( system_matrix_->apply(neg_one_op.get(), dense_x, one_op.get(), r.get()); auto stop_criterion = stop_criterion_factory_->generate( system_matrix_, - std::shared_ptr(dense_b, [](const LinOp *) {}), dense_x, + std::shared_ptr(dense_b, [](const LinOp*) {}), dense_x, r.get()); rr->copy_from(r.get()); @@ -224,8 +224,8 @@ void Bicgstab::apply_dense_impl( template -void Bicgstab::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void Bicgstab::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { diff --git a/core/solver/bicgstab_kernels.hpp b/core/solver/bicgstab_kernels.hpp index 74b1208d4d7..136dcabc391 100644 --- a/core/solver/bicgstab_kernels.hpp +++ b/core/solver/bicgstab_kernels.hpp @@ -51,52 +51,52 @@ namespace bicgstab { #define GKO_DECLARE_BICGSTAB_INITIALIZE_KERNEL(_type) \ void initialize(std::shared_ptr exec, \ - const matrix::Dense<_type> *b, matrix::Dense<_type> *r, \ - matrix::Dense<_type> *rr, matrix::Dense<_type> *y, \ - matrix::Dense<_type> *s, matrix::Dense<_type> *t, \ - matrix::Dense<_type> *z, matrix::Dense<_type> *v, \ - matrix::Dense<_type> *p, matrix::Dense<_type> *prev_rho, \ - matrix::Dense<_type> *rho, matrix::Dense<_type> *alpha, \ - matrix::Dense<_type> *beta, matrix::Dense<_type> *gamma, \ - matrix::Dense<_type> *omega, \ - Array *stop_status) + const matrix::Dense<_type>* b, matrix::Dense<_type>* r, \ + matrix::Dense<_type>* rr, matrix::Dense<_type>* y, \ + matrix::Dense<_type>* s, matrix::Dense<_type>* t, \ + matrix::Dense<_type>* z, matrix::Dense<_type>* v, \ + matrix::Dense<_type>* p, matrix::Dense<_type>* prev_rho, \ + matrix::Dense<_type>* rho, matrix::Dense<_type>* alpha, \ + matrix::Dense<_type>* beta, matrix::Dense<_type>* gamma, \ + matrix::Dense<_type>* omega, \ + Array* stop_status) #define GKO_DECLARE_BICGSTAB_STEP_1_KERNEL(_type) \ void step_1( \ std::shared_ptr exec, \ - const matrix::Dense<_type> *r, matrix::Dense<_type> *p, \ - const matrix::Dense<_type> *v, const matrix::Dense<_type> *rho, \ - const matrix::Dense<_type> *prev_rho, \ - const matrix::Dense<_type> *alpha, const matrix::Dense<_type> *omega, \ - const Array *stop_status) + const matrix::Dense<_type>* r, matrix::Dense<_type>* p, \ + const matrix::Dense<_type>* v, const matrix::Dense<_type>* rho, \ + const matrix::Dense<_type>* prev_rho, \ + const matrix::Dense<_type>* alpha, const matrix::Dense<_type>* omega, \ + const Array* stop_status) #define GKO_DECLARE_BICGSTAB_STEP_2_KERNEL(_type) \ void step_2(std::shared_ptr exec, \ - const matrix::Dense<_type> *r, matrix::Dense<_type> *s, \ - const matrix::Dense<_type> *v, \ - const matrix::Dense<_type> *rho, matrix::Dense<_type> *alpha, \ - const matrix::Dense<_type> *beta, \ - const Array *stop_status) + const matrix::Dense<_type>* r, matrix::Dense<_type>* s, \ + const matrix::Dense<_type>* v, \ + const matrix::Dense<_type>* rho, matrix::Dense<_type>* alpha, \ + const matrix::Dense<_type>* beta, \ + const Array* stop_status) #define GKO_DECLARE_BICGSTAB_STEP_3_KERNEL(_type) \ void step_3( \ - std::shared_ptr exec, matrix::Dense<_type> *x, \ - matrix::Dense<_type> *r, const matrix::Dense<_type> *s, \ - const matrix::Dense<_type> *t, const matrix::Dense<_type> *y, \ - const matrix::Dense<_type> *z, const matrix::Dense<_type> *alpha, \ - const matrix::Dense<_type> *beta, const matrix::Dense<_type> *gamma, \ - matrix::Dense<_type> *omega, \ - const Array *stop_status) + std::shared_ptr exec, matrix::Dense<_type>* x, \ + matrix::Dense<_type>* r, const matrix::Dense<_type>* s, \ + const matrix::Dense<_type>* t, const matrix::Dense<_type>* y, \ + const matrix::Dense<_type>* z, const matrix::Dense<_type>* alpha, \ + const matrix::Dense<_type>* beta, const matrix::Dense<_type>* gamma, \ + matrix::Dense<_type>* omega, \ + const Array* stop_status) #define GKO_DECLARE_BICGSTAB_FINALIZE_KERNEL(_type) \ void finalize(std::shared_ptr exec, \ - matrix::Dense<_type> *x, const matrix::Dense<_type> *y, \ - const matrix::Dense<_type> *alpha, \ - Array *stop_status) + matrix::Dense<_type>* x, const matrix::Dense<_type>* y, \ + const matrix::Dense<_type>* alpha, \ + Array* stop_status) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/cb_gmres.cpp b/core/solver/cb_gmres.cpp index 104d77ccbbb..00339ff028b 100644 --- a/core/solver/cb_gmres.cpp +++ b/core/solver/cb_gmres.cpp @@ -187,7 +187,7 @@ struct helper> { template -void CbGmres::apply_impl(const LinOp *b, LinOp *x) const +void CbGmres::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -199,8 +199,8 @@ void CbGmres::apply_impl(const LinOp *b, LinOp *x) const template void CbGmres::apply_dense_impl( - const matrix::Dense *dense_b, - matrix::Dense *dense_x) const + const matrix::Dense* dense_b, + matrix::Dense* dense_x) const { // Current workaround to get a lambda with a template argument (only // the type of `value` matters, the content does not) @@ -299,8 +299,8 @@ void CbGmres::apply_dense_impl( auto stop_criterion = stop_criterion_factory_->generate( system_matrix_, - std::shared_ptr(dense_b, [](const LinOp *) {}), - dense_x, residual.get()); + std::shared_ptr(dense_b, [](const LinOp*) {}), dense_x, + residual.get()); int total_iter = -1; size_type restart_iter = 0; @@ -504,8 +504,8 @@ void CbGmres::apply_dense_impl( template -void CbGmres::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void CbGmres::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { diff --git a/core/solver/cb_gmres_accessor.hpp b/core/solver/cb_gmres_accessor.hpp index 5c2e9329b99..c03717169ac 100644 --- a/core/solver/cb_gmres_accessor.hpp +++ b/core/solver/cb_gmres_accessor.hpp @@ -114,7 +114,7 @@ class Range3dHelper { return Range(krylov_dim_, bases_.get_data(), scale_.get_data()); } - gko::Array &get_bases() { return bases_; } + gko::Array& get_bases() { return bases_; } private: std::array krylov_dim_; @@ -139,7 +139,7 @@ class Range3dHelper { Range get_range() { return Range(krylov_dim_, bases_.get_data()); } - gko::Array &get_bases() { return bases_; } + gko::Array& get_bases() { return bases_; } private: std::array krylov_dim_; diff --git a/core/solver/cb_gmres_kernels.hpp b/core/solver/cb_gmres_kernels.hpp index 1a7a8765d8d..18b942476fb 100644 --- a/core/solver/cb_gmres_kernels.hpp +++ b/core/solver/cb_gmres_kernels.hpp @@ -127,43 +127,43 @@ namespace kernels { #define GKO_DECLARE_CB_GMRES_INITIALIZE_1_KERNEL(_type) \ void initialize_1( \ std::shared_ptr exec, \ - const matrix::Dense<_type> *b, matrix::Dense<_type> *residual, \ - matrix::Dense<_type> *givens_sin, matrix::Dense<_type> *givens_cos, \ - Array *stop_status, size_type krylov_dim) + const matrix::Dense<_type>* b, matrix::Dense<_type>* residual, \ + matrix::Dense<_type>* givens_sin, matrix::Dense<_type>* givens_cos, \ + Array* stop_status, size_type krylov_dim) #define GKO_DECLARE_CB_GMRES_INITIALIZE_2_KERNEL(_type1, _range) \ void initialize_2(std::shared_ptr exec, \ - const matrix::Dense<_type1> *residual, \ - matrix::Dense> *residual_norm, \ - matrix::Dense<_type1> *residual_norm_collection, \ - matrix::Dense> *arnoldi_norm, \ + const matrix::Dense<_type1>* residual, \ + matrix::Dense>* residual_norm, \ + matrix::Dense<_type1>* residual_norm_collection, \ + matrix::Dense>* arnoldi_norm, \ _range krylov_bases, \ - matrix::Dense<_type1> *next_krylov_basis, \ - Array *final_iter_nums, size_type krylov_dim) + matrix::Dense<_type1>* next_krylov_basis, \ + Array* final_iter_nums, size_type krylov_dim) #define GKO_DECLARE_CB_GMRES_STEP_1_KERNEL(_type1, _range) \ void step_1( \ std::shared_ptr exec, \ - matrix::Dense<_type1> *next_krylov_basis, \ - matrix::Dense<_type1> *givens_sin, matrix::Dense<_type1> *givens_cos, \ - matrix::Dense> *residual_norm, \ - matrix::Dense<_type1> *residual_norm_collection, _range krylov_bases, \ - matrix::Dense<_type1> *hessenberg_iter, \ - matrix::Dense<_type1> *buffer_iter, \ - matrix::Dense> *arnoldi_norm, size_type iter, \ - Array *final_iter_nums, \ - const Array *stop_status, \ - Array *reorth_status, Array *num_reorth) + matrix::Dense<_type1>* next_krylov_basis, \ + matrix::Dense<_type1>* givens_sin, matrix::Dense<_type1>* givens_cos, \ + matrix::Dense>* residual_norm, \ + matrix::Dense<_type1>* residual_norm_collection, _range krylov_bases, \ + matrix::Dense<_type1>* hessenberg_iter, \ + matrix::Dense<_type1>* buffer_iter, \ + matrix::Dense>* arnoldi_norm, size_type iter, \ + Array* final_iter_nums, \ + const Array* stop_status, \ + Array* reorth_status, Array* num_reorth) #define GKO_DECLARE_CB_GMRES_STEP_2_KERNEL(_type1, _range) \ void step_2(std::shared_ptr exec, \ - const matrix::Dense<_type1> *residual_norm_collection, \ - _range krylov_bases, const matrix::Dense<_type1> *hessenberg, \ - matrix::Dense<_type1> *y, \ - matrix::Dense<_type1> *before_preconditioner, \ - const Array *final_iter_nums) + const matrix::Dense<_type1>* residual_norm_collection, \ + _range krylov_bases, const matrix::Dense<_type1>* hessenberg, \ + matrix::Dense<_type1>* y, \ + matrix::Dense<_type1>* before_preconditioner, \ + const Array* final_iter_nums) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/cg.cpp b/core/solver/cg.cpp index ae0fb37f2a9..d2257ba9b24 100644 --- a/core/solver/cg.cpp +++ b/core/solver/cg.cpp @@ -87,7 +87,7 @@ std::unique_ptr Cg::conj_transpose() const template -void Cg::apply_impl(const LinOp *b, LinOp *x) const +void Cg::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -98,8 +98,8 @@ void Cg::apply_impl(const LinOp *b, LinOp *x) const template -void Cg::apply_dense_impl(const matrix::Dense *dense_b, - matrix::Dense *dense_x) const +void Cg::apply_dense_impl(const matrix::Dense* dense_b, + matrix::Dense* dense_x) const { using std::swap; using Vector = matrix::Dense; @@ -136,7 +136,7 @@ void Cg::apply_dense_impl(const matrix::Dense *dense_b, system_matrix_->apply(neg_one_op.get(), dense_x, one_op.get(), r.get()); auto stop_criterion = stop_criterion_factory_->generate( system_matrix_, - std::shared_ptr(dense_b, [](const LinOp *) {}), dense_x, + std::shared_ptr(dense_b, [](const LinOp*) {}), dense_x, r.get()); int iter = -1; @@ -182,8 +182,8 @@ void Cg::apply_dense_impl(const matrix::Dense *dense_b, template -void Cg::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void Cg::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { diff --git a/core/solver/cg_kernels.hpp b/core/solver/cg_kernels.hpp index 2cfa476c744..a455348fae4 100644 --- a/core/solver/cg_kernels.hpp +++ b/core/solver/cg_kernels.hpp @@ -50,28 +50,28 @@ namespace cg { #define GKO_DECLARE_CG_INITIALIZE_KERNEL(_type) \ void initialize(std::shared_ptr exec, \ - const matrix::Dense<_type> *b, matrix::Dense<_type> *r, \ - matrix::Dense<_type> *z, matrix::Dense<_type> *p, \ - matrix::Dense<_type> *q, matrix::Dense<_type> *prev_rho, \ - matrix::Dense<_type> *rho, \ - Array *stop_status) + const matrix::Dense<_type>* b, matrix::Dense<_type>* r, \ + matrix::Dense<_type>* z, matrix::Dense<_type>* p, \ + matrix::Dense<_type>* q, matrix::Dense<_type>* prev_rho, \ + matrix::Dense<_type>* rho, \ + Array* stop_status) #define GKO_DECLARE_CG_STEP_1_KERNEL(_type) \ void step_1(std::shared_ptr exec, \ - matrix::Dense<_type> *p, const matrix::Dense<_type> *z, \ - const matrix::Dense<_type> *rho, \ - const matrix::Dense<_type> *prev_rho, \ - const Array *stop_status) + matrix::Dense<_type>* p, const matrix::Dense<_type>* z, \ + const matrix::Dense<_type>* rho, \ + const matrix::Dense<_type>* prev_rho, \ + const Array* stop_status) #define GKO_DECLARE_CG_STEP_2_KERNEL(_type) \ void step_2(std::shared_ptr exec, \ - matrix::Dense<_type> *x, matrix::Dense<_type> *r, \ - const matrix::Dense<_type> *p, const matrix::Dense<_type> *q, \ - const matrix::Dense<_type> *beta, \ - const matrix::Dense<_type> *rho, \ - const Array *stop_status) + matrix::Dense<_type>* x, matrix::Dense<_type>* r, \ + const matrix::Dense<_type>* p, const matrix::Dense<_type>* q, \ + const matrix::Dense<_type>* beta, \ + const matrix::Dense<_type>* rho, \ + const Array* stop_status) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/cgs.cpp b/core/solver/cgs.cpp index 16589ffbeb3..340fc1f2a50 100644 --- a/core/solver/cgs.cpp +++ b/core/solver/cgs.cpp @@ -87,7 +87,7 @@ std::unique_ptr Cgs::conj_transpose() const template -void Cgs::apply_impl(const LinOp *b, LinOp *x) const +void Cgs::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -98,8 +98,8 @@ void Cgs::apply_impl(const LinOp *b, LinOp *x) const template -void Cgs::apply_dense_impl(const matrix::Dense *dense_b, - matrix::Dense *dense_x) const +void Cgs::apply_dense_impl(const matrix::Dense* dense_b, + matrix::Dense* dense_x) const { using std::swap; using Vector = matrix::Dense; @@ -145,7 +145,7 @@ void Cgs::apply_dense_impl(const matrix::Dense *dense_b, system_matrix_->apply(neg_one_op.get(), dense_x, one_op.get(), r.get()); auto stop_criterion = stop_criterion_factory_->generate( system_matrix_, - std::shared_ptr(dense_b, [](const LinOp *) {}), dense_x, + std::shared_ptr(dense_b, [](const LinOp*) {}), dense_x, r.get()); r_tld->copy_from(r.get()); @@ -204,8 +204,8 @@ void Cgs::apply_dense_impl(const matrix::Dense *dense_b, template -void Cgs::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void Cgs::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { diff --git a/core/solver/cgs_kernels.hpp b/core/solver/cgs_kernels.hpp index 3c10d782062..5f977e672f8 100644 --- a/core/solver/cgs_kernels.hpp +++ b/core/solver/cgs_kernels.hpp @@ -51,41 +51,41 @@ namespace cgs { #define GKO_DECLARE_CGS_INITIALIZE_KERNEL(_type) \ void initialize(std::shared_ptr exec, \ - const matrix::Dense<_type> *b, matrix::Dense<_type> *r, \ - matrix::Dense<_type> *r_tld, matrix::Dense<_type> *p, \ - matrix::Dense<_type> *q, matrix::Dense<_type> *u, \ - matrix::Dense<_type> *u_hat, matrix::Dense<_type> *v_hat, \ - matrix::Dense<_type> *t, matrix::Dense<_type> *alpha, \ - matrix::Dense<_type> *beta, matrix::Dense<_type> *gamma, \ - matrix::Dense<_type> *prev_rho, matrix::Dense<_type> *rho, \ - Array *stop_status) + const matrix::Dense<_type>* b, matrix::Dense<_type>* r, \ + matrix::Dense<_type>* r_tld, matrix::Dense<_type>* p, \ + matrix::Dense<_type>* q, matrix::Dense<_type>* u, \ + matrix::Dense<_type>* u_hat, matrix::Dense<_type>* v_hat, \ + matrix::Dense<_type>* t, matrix::Dense<_type>* alpha, \ + matrix::Dense<_type>* beta, matrix::Dense<_type>* gamma, \ + matrix::Dense<_type>* prev_rho, matrix::Dense<_type>* rho, \ + Array* stop_status) #define GKO_DECLARE_CGS_STEP_1_KERNEL(_type) \ void step_1(std::shared_ptr exec, \ - const matrix::Dense<_type> *r, matrix::Dense<_type> *u, \ - matrix::Dense<_type> *p, const matrix::Dense<_type> *q, \ - matrix::Dense<_type> *beta, const matrix::Dense<_type> *rho, \ - const matrix::Dense<_type> *rho_prev, \ - const Array *stop_status) + const matrix::Dense<_type>* r, matrix::Dense<_type>* u, \ + matrix::Dense<_type>* p, const matrix::Dense<_type>* q, \ + matrix::Dense<_type>* beta, const matrix::Dense<_type>* rho, \ + const matrix::Dense<_type>* rho_prev, \ + const Array* stop_status) #define GKO_DECLARE_CGS_STEP_2_KERNEL(_type) \ void step_2(std::shared_ptr exec, \ - const matrix::Dense<_type> *u, \ - const matrix::Dense<_type> *v_hat, matrix::Dense<_type> *q, \ - matrix::Dense<_type> *t, matrix::Dense<_type> *alpha, \ - const matrix::Dense<_type> *rho, \ - const matrix::Dense<_type> *gamma, \ - const Array *stop_status) + const matrix::Dense<_type>* u, \ + const matrix::Dense<_type>* v_hat, matrix::Dense<_type>* q, \ + matrix::Dense<_type>* t, matrix::Dense<_type>* alpha, \ + const matrix::Dense<_type>* rho, \ + const matrix::Dense<_type>* gamma, \ + const Array* stop_status) #define GKO_DECLARE_CGS_STEP_3_KERNEL(_type) \ void step_3(std::shared_ptr exec, \ - const matrix::Dense<_type> *t, \ - const matrix::Dense<_type> *u_hat, matrix::Dense<_type> *r, \ - matrix::Dense<_type> *x, const matrix::Dense<_type> *alpha, \ - const Array *stop_status) + const matrix::Dense<_type>* t, \ + const matrix::Dense<_type>* u_hat, matrix::Dense<_type>* r, \ + matrix::Dense<_type>* x, const matrix::Dense<_type>* alpha, \ + const Array* stop_status) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/fcg.cpp b/core/solver/fcg.cpp index fc081a21b23..d294ff57dd3 100644 --- a/core/solver/fcg.cpp +++ b/core/solver/fcg.cpp @@ -86,7 +86,7 @@ std::unique_ptr Fcg::conj_transpose() const template -void Fcg::apply_impl(const LinOp *b, LinOp *x) const +void Fcg::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -97,8 +97,8 @@ void Fcg::apply_impl(const LinOp *b, LinOp *x) const template -void Fcg::apply_dense_impl(const matrix::Dense *dense_b, - matrix::Dense *dense_x) const +void Fcg::apply_dense_impl(const matrix::Dense* dense_b, + matrix::Dense* dense_x) const { using std::swap; using Vector = matrix::Dense; @@ -140,7 +140,7 @@ void Fcg::apply_dense_impl(const matrix::Dense *dense_b, system_matrix_->apply(neg_one_op.get(), dense_x, one_op.get(), r.get()); auto stop_criterion = stop_criterion_factory_->generate( system_matrix_, - std::shared_ptr(dense_b, [](const LinOp *) {}), dense_x, + std::shared_ptr(dense_b, [](const LinOp*) {}), dense_x, r.get()); int iter = -1; @@ -189,8 +189,8 @@ void Fcg::apply_dense_impl(const matrix::Dense *dense_b, template -void Fcg::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void Fcg::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { diff --git a/core/solver/fcg_kernels.hpp b/core/solver/fcg_kernels.hpp index 04c385f0e3e..0ac74f78287 100644 --- a/core/solver/fcg_kernels.hpp +++ b/core/solver/fcg_kernels.hpp @@ -50,29 +50,29 @@ namespace fcg { #define GKO_DECLARE_FCG_INITIALIZE_KERNEL(_type) \ void initialize(std::shared_ptr exec, \ - const matrix::Dense<_type> *b, matrix::Dense<_type> *r, \ - matrix::Dense<_type> *z, matrix::Dense<_type> *p, \ - matrix::Dense<_type> *q, matrix::Dense<_type> *t, \ - matrix::Dense<_type> *prev_rho, matrix::Dense<_type> *rho, \ - matrix::Dense<_type> *rho_t, \ - Array *stop_status) + const matrix::Dense<_type>* b, matrix::Dense<_type>* r, \ + matrix::Dense<_type>* z, matrix::Dense<_type>* p, \ + matrix::Dense<_type>* q, matrix::Dense<_type>* t, \ + matrix::Dense<_type>* prev_rho, matrix::Dense<_type>* rho, \ + matrix::Dense<_type>* rho_t, \ + Array* stop_status) #define GKO_DECLARE_FCG_STEP_1_KERNEL(_type) \ void step_1(std::shared_ptr exec, \ - matrix::Dense<_type> *p, const matrix::Dense<_type> *z, \ - const matrix::Dense<_type> *rho_t, \ - const matrix::Dense<_type> *prev_rho, \ - const Array *stop_status) + matrix::Dense<_type>* p, const matrix::Dense<_type>* z, \ + const matrix::Dense<_type>* rho_t, \ + const matrix::Dense<_type>* prev_rho, \ + const Array* stop_status) #define GKO_DECLARE_FCG_STEP_2_KERNEL(_type) \ void step_2( \ - std::shared_ptr exec, matrix::Dense<_type> *x, \ - matrix::Dense<_type> *r, matrix::Dense<_type> *t, \ - const matrix::Dense<_type> *p, const matrix::Dense<_type> *q, \ - const matrix::Dense<_type> *beta, const matrix::Dense<_type> *rho, \ - const Array *stop_status) + std::shared_ptr exec, matrix::Dense<_type>* x, \ + matrix::Dense<_type>* r, matrix::Dense<_type>* t, \ + const matrix::Dense<_type>* p, const matrix::Dense<_type>* q, \ + const matrix::Dense<_type>* beta, const matrix::Dense<_type>* rho, \ + const Array* stop_status) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/gmres.cpp b/core/solver/gmres.cpp index f5e933631c8..c98e482da44 100644 --- a/core/solver/gmres.cpp +++ b/core/solver/gmres.cpp @@ -93,7 +93,7 @@ std::unique_ptr Gmres::conj_transpose() const template -void Gmres::apply_impl(const LinOp *b, LinOp *x) const +void Gmres::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -104,8 +104,8 @@ void Gmres::apply_impl(const LinOp *b, LinOp *x) const template -void Gmres::apply_dense_impl(const matrix::Dense *dense_b, - matrix::Dense *dense_x) const +void Gmres::apply_dense_impl(const matrix::Dense* dense_b, + matrix::Dense* dense_x) const { using Vector = matrix::Dense; using NormVector = matrix::Dense>; @@ -161,7 +161,7 @@ void Gmres::apply_dense_impl(const matrix::Dense *dense_b, auto stop_criterion = stop_criterion_factory_->generate( system_matrix_, - std::shared_ptr(dense_b, [](const LinOp *) {}), dense_x, + std::shared_ptr(dense_b, [](const LinOp*) {}), dense_x, residual.get()); int total_iter = -1; @@ -324,8 +324,8 @@ void Gmres::apply_dense_impl(const matrix::Dense *dense_b, template -void Gmres::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void Gmres::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { diff --git a/core/solver/gmres_kernels.hpp b/core/solver/gmres_kernels.hpp index 81ca349a99e..f6651564185 100644 --- a/core/solver/gmres_kernels.hpp +++ b/core/solver/gmres_kernels.hpp @@ -49,40 +49,40 @@ namespace gmres { #define GKO_DECLARE_GMRES_INITIALIZE_1_KERNEL(_type) \ void initialize_1( \ std::shared_ptr exec, \ - const matrix::Dense<_type> *b, matrix::Dense<_type> *residual, \ - matrix::Dense<_type> *givens_sin, matrix::Dense<_type> *givens_cos, \ - Array *stop_status, size_type krylov_dim) + const matrix::Dense<_type>* b, matrix::Dense<_type>* residual, \ + matrix::Dense<_type>* givens_sin, matrix::Dense<_type>* givens_cos, \ + Array* stop_status, size_type krylov_dim) #define GKO_DECLARE_GMRES_INITIALIZE_2_KERNEL(_type) \ void initialize_2(std::shared_ptr exec, \ - const matrix::Dense<_type> *residual, \ - matrix::Dense> *residual_norm, \ - matrix::Dense<_type> *residual_norm_collection, \ - matrix::Dense<_type> *krylov_bases, \ - Array *final_iter_nums, size_type krylov_dim) + const matrix::Dense<_type>* residual, \ + matrix::Dense>* residual_norm, \ + matrix::Dense<_type>* residual_norm_collection, \ + matrix::Dense<_type>* krylov_bases, \ + Array* final_iter_nums, size_type krylov_dim) #define GKO_DECLARE_GMRES_STEP_1_KERNEL(_type) \ void step_1(std::shared_ptr exec, \ - size_type num_rows, matrix::Dense<_type> *givens_sin, \ - matrix::Dense<_type> *givens_cos, \ - matrix::Dense> *residual_norm, \ - matrix::Dense<_type> *residual_norm_collection, \ - matrix::Dense<_type> *krylov_bases, \ - matrix::Dense<_type> *hessenberg_iter, size_type iter, \ - Array *final_iter_nums, \ - const Array *stop_status) + size_type num_rows, matrix::Dense<_type>* givens_sin, \ + matrix::Dense<_type>* givens_cos, \ + matrix::Dense>* residual_norm, \ + matrix::Dense<_type>* residual_norm_collection, \ + matrix::Dense<_type>* krylov_bases, \ + matrix::Dense<_type>* hessenberg_iter, size_type iter, \ + Array* final_iter_nums, \ + const Array* stop_status) #define GKO_DECLARE_GMRES_STEP_2_KERNEL(_type) \ void step_2(std::shared_ptr exec, \ - const matrix::Dense<_type> *residual_norm_collection, \ - const matrix::Dense<_type> *krylov_bases, \ - const matrix::Dense<_type> *hessenberg, \ - matrix::Dense<_type> *y, \ - matrix::Dense<_type> *before_preconditioner, \ - const Array *final_iter_nums) + const matrix::Dense<_type>* residual_norm_collection, \ + const matrix::Dense<_type>* krylov_bases, \ + const matrix::Dense<_type>* hessenberg, \ + matrix::Dense<_type>* y, \ + matrix::Dense<_type>* before_preconditioner, \ + const Array* final_iter_nums) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/idr.cpp b/core/solver/idr.cpp index d3190414678..76c6858fcc4 100644 --- a/core/solver/idr.cpp +++ b/core/solver/idr.cpp @@ -91,8 +91,8 @@ std::unique_ptr Idr::conj_transpose() const template template -void Idr::iterate(const matrix::Dense *dense_b, - matrix::Dense *dense_x) const +void Idr::iterate(const matrix::Dense* dense_b, + matrix::Dense* dense_x) const { using std::swap; using Vector = matrix::Dense; @@ -167,7 +167,7 @@ void Idr::iterate(const matrix::Dense *dense_b, auto stop_criterion = stop_criterion_factory_->generate( system_matrix_, - std::shared_ptr(dense_b, [](const LinOp *) {}), dense_x, + std::shared_ptr(dense_b, [](const LinOp*) {}), dense_x, residual.get()); int total_iter = -1; @@ -271,7 +271,7 @@ void Idr::iterate(const matrix::Dense *dense_b, template -void Idr::apply_impl(const LinOp *b, LinOp *x) const +void Idr::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -282,7 +282,7 @@ void Idr::apply_impl(const LinOp *b, LinOp *x) const auto complex_x = dense_x->make_complex(); this->iterate(complex_b.get(), complex_x.get()); complex_x->get_real( - dynamic_cast> *>( + dynamic_cast>*>( dense_x)); } else { this->iterate(dense_b, dense_x); @@ -293,8 +293,8 @@ void Idr::apply_impl(const LinOp *b, LinOp *x) const template -void Idr::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void Idr::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { diff --git a/core/solver/idr_kernels.hpp b/core/solver/idr_kernels.hpp index 6a6e43f45ef..8812d23ddb6 100644 --- a/core/solver/idr_kernels.hpp +++ b/core/solver/idr_kernels.hpp @@ -48,47 +48,47 @@ namespace idr { #define GKO_DECLARE_IDR_INITIALIZE_KERNEL(_type) \ void initialize(std::shared_ptr exec, \ - const size_type nrhs, matrix::Dense<_type> *m, \ - matrix::Dense<_type> *subspace_vectors, \ - bool deterministic, Array *stop_status) + const size_type nrhs, matrix::Dense<_type>* m, \ + matrix::Dense<_type>* subspace_vectors, \ + bool deterministic, Array* stop_status) #define GKO_DECLARE_IDR_STEP_1_KERNEL(_type) \ void step_1( \ std::shared_ptr exec, const size_type nrhs, \ - const size_type k, const matrix::Dense<_type> *m, \ - const matrix::Dense<_type> *f, const matrix::Dense<_type> *residual, \ - const matrix::Dense<_type> *g, matrix::Dense<_type> *c, \ - matrix::Dense<_type> *v, const Array *stop_status) + const size_type k, const matrix::Dense<_type>* m, \ + const matrix::Dense<_type>* f, const matrix::Dense<_type>* residual, \ + const matrix::Dense<_type>* g, matrix::Dense<_type>* c, \ + matrix::Dense<_type>* v, const Array* stop_status) #define GKO_DECLARE_IDR_STEP_2_KERNEL(_type) \ void step_2(std::shared_ptr exec, \ const size_type nrhs, const size_type k, \ - const matrix::Dense<_type> *omega, \ - const matrix::Dense<_type> *preconditioned_vector, \ - const matrix::Dense<_type> *c, matrix::Dense<_type> *u, \ - const Array *stop_status) + const matrix::Dense<_type>* omega, \ + const matrix::Dense<_type>* preconditioned_vector, \ + const matrix::Dense<_type>* c, matrix::Dense<_type>* u, \ + const Array* stop_status) #define GKO_DECLARE_IDR_STEP_3_KERNEL(_type) \ void step_3(std::shared_ptr exec, \ const size_type nrhs, const size_type k, \ - const matrix::Dense<_type> *p, matrix::Dense<_type> *g, \ - matrix::Dense<_type> *g_k, matrix::Dense<_type> *u, \ - matrix::Dense<_type> *m, matrix::Dense<_type> *f, \ - matrix::Dense<_type> *alpha, matrix::Dense<_type> *residual, \ - matrix::Dense<_type> *x, \ - const Array *stop_status) + const matrix::Dense<_type>* p, matrix::Dense<_type>* g, \ + matrix::Dense<_type>* g_k, matrix::Dense<_type>* u, \ + matrix::Dense<_type>* m, matrix::Dense<_type>* f, \ + matrix::Dense<_type>* alpha, matrix::Dense<_type>* residual, \ + matrix::Dense<_type>* x, \ + const Array* stop_status) #define GKO_DECLARE_IDR_COMPUTE_OMEGA_KERNEL(_type) \ void compute_omega( \ std::shared_ptr exec, const size_type nrhs, \ - const remove_complex<_type> kappa, const matrix::Dense<_type> *tht, \ - const matrix::Dense> *residual_norm, \ - matrix::Dense<_type> *omega, \ - const Array *stop_status) + const remove_complex<_type> kappa, const matrix::Dense<_type>* tht, \ + const matrix::Dense>* residual_norm, \ + matrix::Dense<_type>* omega, \ + const Array* stop_status) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/ir.cpp b/core/solver/ir.cpp index 9b61d78cb33..15882c11155 100644 --- a/core/solver/ir.cpp +++ b/core/solver/ir.cpp @@ -82,7 +82,7 @@ std::unique_ptr Ir::conj_transpose() const template -void Ir::apply_impl(const LinOp *b, LinOp *x) const +void Ir::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -93,8 +93,8 @@ void Ir::apply_impl(const LinOp *b, LinOp *x) const template -void Ir::apply_dense_impl(const matrix::Dense *dense_b, - matrix::Dense *dense_x) const +void Ir::apply_dense_impl(const matrix::Dense* dense_b, + matrix::Dense* dense_x) const { using Vector = matrix::Dense; constexpr uint8 relative_stopping_id{1}; @@ -116,7 +116,7 @@ void Ir::apply_dense_impl(const matrix::Dense *dense_b, auto stop_criterion = stop_criterion_factory_->generate( system_matrix_, - std::shared_ptr(dense_b, [](const LinOp *) {}), dense_x, + std::shared_ptr(dense_b, [](const LinOp*) {}), dense_x, lend(residual)); int iter = -1; @@ -163,8 +163,8 @@ void Ir::apply_dense_impl(const matrix::Dense *dense_b, template -void Ir::apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const +void Ir::apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { diff --git a/core/solver/ir_kernels.hpp b/core/solver/ir_kernels.hpp index 2f96a445e3d..afded9d369b 100644 --- a/core/solver/ir_kernels.hpp +++ b/core/solver/ir_kernels.hpp @@ -50,7 +50,7 @@ namespace ir { #define GKO_DECLARE_IR_INITIALIZE_KERNEL \ void initialize(std::shared_ptr exec, \ - Array *stop_status) + Array* stop_status) #define GKO_DECLARE_ALL_AS_TEMPLATES GKO_DECLARE_IR_INITIALIZE_KERNEL diff --git a/core/solver/lower_trs.cpp b/core/solver/lower_trs.cpp index cbe5f332f9a..7ae9e1b4858 100644 --- a/core/solver/lower_trs.cpp +++ b/core/solver/lower_trs.cpp @@ -102,7 +102,7 @@ void LowerTrs::generate() template -void LowerTrs::apply_impl(const LinOp *b, LinOp *x) const +void LowerTrs::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -138,10 +138,10 @@ void LowerTrs::apply_impl(const LinOp *b, LinOp *x) const template -void LowerTrs::apply_impl(const LinOp *alpha, - const LinOp *b, - const LinOp *beta, - LinOp *x) const +void LowerTrs::apply_impl(const LinOp* alpha, + const LinOp* b, + const LinOp* beta, + LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { diff --git a/core/solver/lower_trs_kernels.hpp b/core/solver/lower_trs_kernels.hpp index 5f9f272417a..fabb3463e05 100644 --- a/core/solver/lower_trs_kernels.hpp +++ b/core/solver/lower_trs_kernels.hpp @@ -52,27 +52,27 @@ namespace lower_trs { #define GKO_DECLARE_LOWER_TRS_SHOULD_PERFORM_TRANSPOSE_KERNEL() \ void should_perform_transpose(std::shared_ptr exec, \ - bool &do_transpose) + bool& do_transpose) #define GKO_DECLARE_LOWER_TRS_INIT_STRUCT_KERNEL() \ void init_struct(std::shared_ptr exec, \ - std::shared_ptr &solve_struct) + std::shared_ptr& solve_struct) #define GKO_DECLARE_LOWER_TRS_GENERATE_KERNEL(_vtype, _itype) \ void generate(std::shared_ptr exec, \ - const matrix::Csr<_vtype, _itype> *matrix, \ - solver::SolveStruct *solve_struct, \ + const matrix::Csr<_vtype, _itype>* matrix, \ + solver::SolveStruct* solve_struct, \ const gko::size_type num_rhs) #define GKO_DECLARE_LOWER_TRS_SOLVE_KERNEL(_vtype, _itype) \ void solve(std::shared_ptr exec, \ - const matrix::Csr<_vtype, _itype> *matrix, \ - const solver::SolveStruct *solve_struct, \ - matrix::Dense<_vtype> *trans_b, matrix::Dense<_vtype> *trans_x, \ - const matrix::Dense<_vtype> *b, matrix::Dense<_vtype> *x) + const matrix::Csr<_vtype, _itype>* matrix, \ + const solver::SolveStruct* solve_struct, \ + matrix::Dense<_vtype>* trans_b, matrix::Dense<_vtype>* trans_x, \ + const matrix::Dense<_vtype>* b, matrix::Dense<_vtype>* x) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/solver/upper_trs.cpp b/core/solver/upper_trs.cpp index bae00182fc8..d109d2cbd65 100644 --- a/core/solver/upper_trs.cpp +++ b/core/solver/upper_trs.cpp @@ -102,7 +102,7 @@ void UpperTrs::generate() template -void UpperTrs::apply_impl(const LinOp *b, LinOp *x) const +void UpperTrs::apply_impl(const LinOp* b, LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_b, auto dense_x) { @@ -138,10 +138,10 @@ void UpperTrs::apply_impl(const LinOp *b, LinOp *x) const template -void UpperTrs::apply_impl(const LinOp *alpha, - const LinOp *b, - const LinOp *beta, - LinOp *x) const +void UpperTrs::apply_impl(const LinOp* alpha, + const LinOp* b, + const LinOp* beta, + LinOp* x) const { precision_dispatch_real_complex( [this](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { diff --git a/core/solver/upper_trs_kernels.hpp b/core/solver/upper_trs_kernels.hpp index bdbc4a9b1d7..f9f3facd924 100644 --- a/core/solver/upper_trs_kernels.hpp +++ b/core/solver/upper_trs_kernels.hpp @@ -52,27 +52,27 @@ namespace upper_trs { #define GKO_DECLARE_UPPER_TRS_SHOULD_PERFORM_TRANSPOSE_KERNEL() \ void should_perform_transpose(std::shared_ptr exec, \ - bool &do_transpose) + bool& do_transpose) #define GKO_DECLARE_UPPER_TRS_INIT_STRUCT_KERNEL() \ void init_struct(std::shared_ptr exec, \ - std::shared_ptr &solve_struct) + std::shared_ptr& solve_struct) #define GKO_DECLARE_UPPER_TRS_GENERATE_KERNEL(_vtype, _itype) \ void generate(std::shared_ptr exec, \ - const matrix::Csr<_vtype, _itype> *matrix, \ - solver::SolveStruct *solve_struct, \ + const matrix::Csr<_vtype, _itype>* matrix, \ + solver::SolveStruct* solve_struct, \ const gko::size_type num_rhs) #define GKO_DECLARE_UPPER_TRS_SOLVE_KERNEL(_vtype, _itype) \ void solve(std::shared_ptr exec, \ - const matrix::Csr<_vtype, _itype> *matrix, \ - const solver::SolveStruct *solve_struct, \ - matrix::Dense<_vtype> *trans_b, matrix::Dense<_vtype> *trans_x, \ - const matrix::Dense<_vtype> *b, matrix::Dense<_vtype> *x) + const matrix::Csr<_vtype, _itype>* matrix, \ + const solver::SolveStruct* solve_struct, \ + matrix::Dense<_vtype>* trans_b, matrix::Dense<_vtype>* trans_x, \ + const matrix::Dense<_vtype>* b, matrix::Dense<_vtype>* x) #define GKO_DECLARE_ALL_AS_TEMPLATES \ diff --git a/core/stop/combined.cpp b/core/stop/combined.cpp index 898c6ea56d9..2250c81753f 100644 --- a/core/stop/combined.cpp +++ b/core/stop/combined.cpp @@ -38,13 +38,13 @@ namespace stop { bool Combined::check_impl(uint8 stoppingId, bool setFinalized, - Array *stop_status, - bool *one_changed, const Updater &updater) + Array* stop_status, + bool* one_changed, const Updater& updater) { bool one_converged = false; gko::uint8 ids{1}; *one_changed = false; - for (auto &c : criteria_) { + for (auto& c : criteria_) { bool local_one_changed = false; one_converged |= c->check(ids, setFinalized, stop_status, &local_one_changed, updater); diff --git a/core/stop/criterion.cpp b/core/stop/criterion.cpp index ca902912307..0be61aaf51c 100644 --- a/core/stop/criterion.cpp +++ b/core/stop/criterion.cpp @@ -50,7 +50,7 @@ GKO_REGISTER_OPERATION(set_all_statuses, set_all_statuses::set_all_statuses); void Criterion::set_all_statuses(uint8 stoppingId, bool setFinalized, - Array *stop_status) + Array* stop_status) { this->get_executor()->run(criterion::make_set_all_statuses( stoppingId, setFinalized, stop_status)); diff --git a/core/stop/criterion_kernels.hpp b/core/stop/criterion_kernels.hpp index 0844caba099..f3473d9c415 100644 --- a/core/stop/criterion_kernels.hpp +++ b/core/stop/criterion_kernels.hpp @@ -47,7 +47,7 @@ namespace set_all_statuses { #define GKO_DECLARE_SET_ALL_STATUSES_KERNEL() \ void set_all_statuses(std::shared_ptr exec, \ uint8 stoppingId, bool setFinalized, \ - Array *stop_status) + Array* stop_status) } // namespace set_all_statuses diff --git a/core/stop/iteration.cpp b/core/stop/iteration.cpp index dabe099d112..dc2bf969e3c 100644 --- a/core/stop/iteration.cpp +++ b/core/stop/iteration.cpp @@ -38,8 +38,8 @@ namespace stop { bool Iteration::check_impl(uint8 stoppingId, bool setFinalized, - Array *stop_status, - bool *one_changed, const Updater &updater) + Array* stop_status, + bool* one_changed, const Updater& updater) { bool result = updater.num_iterations_ >= parameters_.max_iters; if (result) { diff --git a/core/stop/residual_norm.cpp b/core/stop/residual_norm.cpp index bd31e67caef..f36f1a24706 100644 --- a/core/stop/residual_norm.cpp +++ b/core/stop/residual_norm.cpp @@ -64,18 +64,18 @@ GKO_REGISTER_OPERATION(implicit_residual_norm, template bool ResidualNormBase::check_impl( - uint8 stopping_id, bool set_finalized, Array *stop_status, - bool *one_changed, const Criterion::Updater &updater) + uint8 stopping_id, bool set_finalized, Array* stop_status, + bool* one_changed, const Criterion::Updater& updater) { - const NormVector *dense_tau; + const NormVector* dense_tau; if (updater.residual_norm_ != nullptr) { dense_tau = as(updater.residual_norm_); } else if (updater.residual_ != nullptr) { - if (dynamic_cast(updater.residual_)) { - auto *dense_r = as(updater.residual_); + if (dynamic_cast(updater.residual_)) { + auto* dense_r = as(updater.residual_); dense_r->compute_norm2(u_dense_tau_.get()); } else { - auto *dense_r = as(updater.residual_); + auto* dense_r = as(updater.residual_); dense_r->compute_norm2(u_dense_tau_.get()); } dense_tau = u_dense_tau_.get(); @@ -115,10 +115,10 @@ bool ResidualNormBase::check_impl( template bool ImplicitResidualNorm::check_impl( - uint8 stopping_id, bool set_finalized, Array *stop_status, - bool *one_changed, const Criterion::Updater &updater) + uint8 stopping_id, bool set_finalized, Array* stop_status, + bool* one_changed, const Criterion::Updater& updater) { - const Vector *dense_tau; + const Vector* dense_tau; if (updater.implicit_sq_residual_norm_ != nullptr) { dense_tau = as(updater.implicit_sq_residual_norm_); } else { diff --git a/core/stop/residual_norm_kernels.hpp b/core/stop/residual_norm_kernels.hpp index cb0496b5aef..7805fbf4238 100644 --- a/core/stop/residual_norm_kernels.hpp +++ b/core/stop/residual_norm_kernels.hpp @@ -49,10 +49,10 @@ namespace residual_norm { #define GKO_DECLARE_RESIDUAL_NORM_KERNEL(_type) \ void residual_norm( \ std::shared_ptr exec, \ - const matrix::Dense<_type> *tau, const matrix::Dense<_type> *orig_tau, \ + const matrix::Dense<_type>* tau, const matrix::Dense<_type>* orig_tau, \ _type rel_residual_goal, uint8 stoppingId, bool setFinalized, \ - Array *stop_status, Array *device_storage, \ - bool *all_converged, bool *one_changed) + Array* stop_status, Array* device_storage, \ + bool* all_converged, bool* one_changed) #define GKO_DECLARE_ALL_AS_TEMPLATES \ @@ -69,11 +69,11 @@ namespace implicit_residual_norm { #define GKO_DECLARE_IMPLICIT_RESIDUAL_NORM_KERNEL(_type) \ void implicit_residual_norm( \ std::shared_ptr exec, \ - const matrix::Dense<_type> *tau, \ - const matrix::Dense> *orig_tau, \ + const matrix::Dense<_type>* tau, \ + const matrix::Dense>* orig_tau, \ remove_complex<_type> rel_residual_goal, uint8 stoppingId, \ - bool setFinalized, Array *stop_status, \ - Array *device_storage, bool *all_converged, bool *one_changed) + bool setFinalized, Array* stop_status, \ + Array* device_storage, bool* all_converged, bool* one_changed) #define GKO_DECLARE_ALL_AS_TEMPLATES2 \ diff --git a/core/stop/time.cpp b/core/stop/time.cpp index f9f28b5365d..177e37be2c4 100644 --- a/core/stop/time.cpp +++ b/core/stop/time.cpp @@ -38,8 +38,8 @@ namespace stop { bool Time::check_impl(uint8 stoppingId, bool setFinalized, - Array *stop_status, bool *one_changed, - const Updater &updater) + Array* stop_status, bool* one_changed, + const Updater& updater) { bool result = clock::now() - start_ >= time_limit_; if (result) { diff --git a/core/test/accessor/reduced_row_major_ginkgo.cpp b/core/test/accessor/reduced_row_major_ginkgo.cpp index c64c2f4cefe..ea4770db0d9 100644 --- a/core/test/accessor/reduced_row_major_ginkgo.cpp +++ b/core/test/accessor/reduced_row_major_ginkgo.cpp @@ -114,7 +114,7 @@ class ReducedStorage3d : public ::testing::Test { } template - void check_accessor_correctness(const Accessor &a, + void check_accessor_correctness(const Accessor& a, std::tuple ignore = t(99, 99, 99)) { diff --git a/core/test/accessor/scaled_reduced_row_major.cpp b/core/test/accessor/scaled_reduced_row_major.cpp index 2cf14d5732b..f002c00d2bf 100644 --- a/core/test/accessor/scaled_reduced_row_major.cpp +++ b/core/test/accessor/scaled_reduced_row_major.cpp @@ -91,7 +91,7 @@ class ScaledReducedStorage3d : public ::testing::Test { template static void check_accessor_correctness( - const Accessor &a, + const Accessor& a, std::tuple ignore = std::tuple(99, 99, 99)) { diff --git a/core/test/base/abstract_factory.cpp b/core/test/base/abstract_factory.cpp index 15ee287d742..afa9ffad7a2 100644 --- a/core/test/base/abstract_factory.cpp +++ b/core/test/base/abstract_factory.cpp @@ -61,7 +61,7 @@ struct IntFactory }; struct MyInt { - MyInt(const IntFactory *factory, int orig_value) + MyInt(const IntFactory* factory, int orig_value) : value{orig_value * factory->get_parameters().coefficient} {} int value; diff --git a/core/test/base/allocator.cpp b/core/test/base/allocator.cpp index 3d09052487f..6ffc0800b7e 100644 --- a/core/test/base/allocator.cpp +++ b/core/test/base/allocator.cpp @@ -47,7 +47,7 @@ TEST(ExecutorAllocator, Works) auto exec = gko::ReferenceExecutor::create(); auto alloc = gko::ExecutorAllocator(exec); - int *ptr{}; + int* ptr{}; ASSERT_NO_THROW(ptr = alloc.allocate(10)); // This test can only fail with sanitizers ptr[0] = 0; diff --git a/core/test/base/array.cpp b/core/test/base/array.cpp index 708bfcf5dad..24fe9ed52c8 100644 --- a/core/test/base/array.cpp +++ b/core/test/base/array.cpp @@ -57,7 +57,7 @@ class Array : public ::testing::Test { x.get_data()[1] = 2; } - static void assert_equal_to_original_x(gko::Array &a, + static void assert_equal_to_original_x(gko::Array& a, bool check_zero = true) { ASSERT_EQ(a.get_num_elems(), 2); @@ -366,7 +366,7 @@ TYPED_TEST(Array, ViewCannotBeResized) template class my_null_deleter { public: - using pointer = T *; + using pointer = T*; void operator()(pointer) const noexcept {} }; diff --git a/core/test/base/combination.cpp b/core/test/base/combination.cpp index de24b3eba99..5571350276c 100644 --- a/core/test/base/combination.cpp +++ b/core/test/base/combination.cpp @@ -50,10 +50,10 @@ struct DummyOperator : public gko::EnableLinOp { : gko::EnableLinOp(exec, gko::dim<2>{1, 1}) {} - void apply_impl(const LinOp *b, LinOp *x) const override {} + void apply_impl(const LinOp* b, LinOp* x) const override {} - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override {} }; diff --git a/core/test/base/composition.cpp b/core/test/base/composition.cpp index 628be53b08c..fbd39d8b430 100644 --- a/core/test/base/composition.cpp +++ b/core/test/base/composition.cpp @@ -51,10 +51,10 @@ struct DummyOperator : public gko::EnableLinOp { : gko::EnableLinOp(exec, size) {} - void apply_impl(const LinOp *b, LinOp *x) const override {} + void apply_impl(const LinOp* b, LinOp* x) const override {} - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override {} }; diff --git a/core/test/base/dim.cpp b/core/test/base/dim.cpp index f78191967d9..dcb9309241b 100644 --- a/core/test/base/dim.cpp +++ b/core/test/base/dim.cpp @@ -82,15 +82,15 @@ TEST(Dim, ConstructsNullObject) class dim_manager { public: using dim = gko::dim<3>; - const dim &get_size() const { return size_; } + const dim& get_size() const { return size_; } - static std::unique_ptr create(const dim &size) + static std::unique_ptr create(const dim& size) { return std::unique_ptr{new dim_manager{size}}; } private: - dim_manager(const dim &size) : size_{size} {} + dim_manager(const dim& size) : size_{size} {} dim size_; }; diff --git a/core/test/base/exception_helpers.cpp b/core/test/base/exception_helpers.cpp index c9f961f807b..6710ab514c7 100644 --- a/core/test/base/exception_helpers.cpp +++ b/core/test/base/exception_helpers.cpp @@ -56,12 +56,12 @@ TEST(NotCompiled, ThrowsWhenUsed) template -void test_not_supported_impl(const T &obj) +void test_not_supported_impl(const T& obj) { try { GKO_NOT_SUPPORTED(obj); FAIL(); - } catch (gko::NotSupported &m) { + } catch (gko::NotSupported& m) { // check for equal suffix std::string msg{m.what()}; auto expected = gko::name_demangling::get_type_name(typeid(Expected)); @@ -87,7 +87,7 @@ struct Derived : Base {}; TEST(NotSupported, ReturnsPtrNotSupportedException) { Derived d; - Base *b = &d; + Base* b = &d; test_not_supported_impl(b); } @@ -95,7 +95,7 @@ TEST(NotSupported, ReturnsPtrNotSupportedException) TEST(NotSupported, ReturnsRefNotSupportedException) { Derived d; - Base &b = d; + Base& b = d; test_not_supported_impl(b); } diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp index 5f1ec600cd2..cda75a709bb 100644 --- a/core/test/base/executor.cpp +++ b/core/test/base/executor.cpp @@ -56,7 +56,7 @@ using exec_ptr = std::shared_ptr; class ExampleOperation : public gko::Operation { public: - explicit ExampleOperation(int &val) : value(val) {} + explicit ExampleOperation(int& val) : value(val) {} void run(std::shared_ptr) const override { value = 1; @@ -78,7 +78,7 @@ class ExampleOperation : public gko::Operation { value = 5; } - int &value; + int& value; }; @@ -112,7 +112,7 @@ TEST(OmpExecutor, AllocatesAndFreesMemory) { const int num_elems = 10; exec_ptr omp = gko::OmpExecutor::create(); - int *ptr = nullptr; + int* ptr = nullptr; ASSERT_NO_THROW(ptr = omp->alloc(num_elems)); ASSERT_NO_THROW(omp->free(ptr)); @@ -130,7 +130,7 @@ TEST(OmpExecutor, FailsWhenOverallocating) { const gko::size_type num_elems = 1ll << 50; // 4PB of integers exec_ptr omp = gko::OmpExecutor::create(); - int *ptr = nullptr; + int* ptr = nullptr; ASSERT_THROW(ptr = omp->alloc(num_elems), gko::AllocationError); @@ -143,7 +143,7 @@ TEST(OmpExecutor, CopiesData) int orig[] = {3, 8}; const int num_elems = std::extent::value; exec_ptr omp = gko::OmpExecutor::create(); - int *copy = omp->alloc(num_elems); + int* copy = omp->alloc(num_elems); // user code is run on the OMP, so local variables are in OMP memory omp->copy(num_elems, orig, copy); @@ -239,7 +239,7 @@ TEST(ReferenceExecutor, AllocatesAndFreesMemory) { const int num_elems = 10; exec_ptr ref = gko::ReferenceExecutor::create(); - int *ptr = nullptr; + int* ptr = nullptr; ASSERT_NO_THROW(ptr = ref->alloc(num_elems)); ASSERT_NO_THROW(ref->free(ptr)); @@ -257,7 +257,7 @@ TEST(ReferenceExecutor, FailsWhenOverallocating) { const gko::size_type num_elems = 1ll << 50; // 4PB of integers exec_ptr ref = gko::ReferenceExecutor::create(); - int *ptr = nullptr; + int* ptr = nullptr; ASSERT_THROW(ptr = ref->alloc(num_elems), gko::AllocationError); @@ -270,7 +270,7 @@ TEST(ReferenceExecutor, CopiesData) int orig[] = {3, 8}; const int num_elems = std::extent::value; exec_ptr ref = gko::ReferenceExecutor::create(); - int *copy = ref->alloc(num_elems); + int* copy = ref->alloc(num_elems); // ReferenceExecutor is a type of OMP executor, so this is O.K. ref->copy(num_elems, orig, copy); @@ -284,7 +284,7 @@ TEST(ReferenceExecutor, CopiesData) TEST(ReferenceExecutor, CopiesSingleValue) { exec_ptr ref = gko::ReferenceExecutor::create(); - int *el = ref->alloc(1); + int* el = ref->alloc(1); el[0] = 83683; EXPECT_EQ(83683, ref->copy_val_to_host(el)); @@ -299,7 +299,7 @@ TEST(ReferenceExecutor, CopiesDataFromOmp) const int num_elems = std::extent::value; exec_ptr omp = gko::OmpExecutor::create(); exec_ptr ref = gko::ReferenceExecutor::create(); - int *copy = ref->alloc(num_elems); + int* copy = ref->alloc(num_elems); // ReferenceExecutor is a type of OMP executor, so this is O.K. ref->copy_from(omp.get(), num_elems, orig, copy); @@ -316,7 +316,7 @@ TEST(ReferenceExecutor, CopiesDataToOmp) const int num_elems = std::extent::value; exec_ptr omp = gko::OmpExecutor::create(); exec_ptr ref = gko::ReferenceExecutor::create(); - int *copy = omp->alloc(num_elems); + int* copy = omp->alloc(num_elems); // ReferenceExecutor is a type of OMP executor, so this is O.K. omp->copy_from(ref.get(), num_elems, orig, copy); @@ -619,10 +619,10 @@ struct mock_free : T { * with `()` operator instead of `{}`. */ template - mock_free(Params &&... params) : T(std::forward(params)...) + mock_free(Params&&... params) : T(std::forward(params)...) {} - void raw_free(void *ptr) const noexcept override + void raw_free(void* ptr) const noexcept override { called_free = true; T::raw_free(ptr); diff --git a/core/test/base/extended_float.cpp b/core/test/base/extended_float.cpp index f1c38f8b983..5b16a717be3 100644 --- a/core/test/base/extended_float.cpp +++ b/core/test/base/extended_float.cpp @@ -77,14 +77,15 @@ class ExtendedFloatTestBase : public ::testing::Test { static floating create_from_bits(const char (&s)[N]) { auto bits = std::bitset(s).to_ullong(); - return reinterpret_cast &>(bits); + return reinterpret_cast&>(bits); } template static std::bitset get_bits(T val) { - auto bits = reinterpret_cast< - typename gko::detail::float_traits::bits_type &>(val); + auto bits = + reinterpret_cast::bits_type&>( + val); return std::bitset(bits); } diff --git a/core/test/base/iterator_factory.cpp b/core/test/base/iterator_factory.cpp index 340398d64c9..b7ed701ac1c 100644 --- a/core/test/base/iterator_factory.cpp +++ b/core/test/base/iterator_factory.cpp @@ -63,8 +63,8 @@ class IteratorFactory : public ::testing::Test { {} template - void check_vector_equal(const std::vector &v1, - const std::vector &v2) + void check_vector_equal(const std::vector& v1, + const std::vector& v2) { ASSERT_TRUE(std::equal(v1.begin(), v1.end(), v2.begin())); } diff --git a/core/test/base/lin_op.cpp b/core/test/base/lin_op.cpp index 1c72b536778..fb7a9b263e1 100644 --- a/core/test/base/lin_op.cpp +++ b/core/test/base/lin_op.cpp @@ -64,23 +64,23 @@ class DummyLinOp : public gko::EnableLinOp, mutable std::shared_ptr last_beta_access; protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override { this->access(); - static_cast(b)->access(); - static_cast(x)->access(); + static_cast(b)->access(); + static_cast(x)->access(); last_b_access = b->get_executor(); last_x_access = x->get_executor(); } - void apply_impl(const gko::LinOp *alpha, const gko::LinOp *b, - const gko::LinOp *beta, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* alpha, const gko::LinOp* b, + const gko::LinOp* beta, gko::LinOp* x) const override { this->access(); - static_cast(alpha)->access(); - static_cast(b)->access(); - static_cast(beta)->access(); - static_cast(x)->access(); + static_cast(alpha)->access(); + static_cast(b)->access(); + static_cast(beta)->access(); + static_cast(x)->access(); last_alpha_access = alpha->get_executor(); last_b_access = b->get_executor(); last_beta_access = beta->get_executor(); @@ -266,7 +266,7 @@ class DummyLinOpWithFactory GKO_ENABLE_LIN_OP_FACTORY(DummyLinOpWithFactory, parameters, Factory); GKO_ENABLE_BUILD_METHOD(Factory); - DummyLinOpWithFactory(const Factory *factory, + DummyLinOpWithFactory(const Factory* factory, std::shared_ptr op) : gko::EnableLinOp(factory->get_executor()), parameters_{factory->get_parameters()}, @@ -276,10 +276,10 @@ class DummyLinOpWithFactory std::shared_ptr op_; protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override {} + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override {} - void apply_impl(const gko::LinOp *alpha, const gko::LinOp *b, - const gko::LinOp *beta, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* alpha, const gko::LinOp* b, + const gko::LinOp* beta, gko::LinOp* x) const override {} }; @@ -351,10 +351,10 @@ class DummyLinOpWithType Type get_value() const { return value_; } protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override {} + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override {} - void apply_impl(const gko::LinOp *alpha, const gko::LinOp *b, - const gko::LinOp *beta, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* alpha, const gko::LinOp* b, + const gko::LinOp* beta, gko::LinOp* x) const override {} private: diff --git a/core/test/base/matrix_data.cpp b/core/test/base/matrix_data.cpp index 23181ce77bd..78aff6225f9 100644 --- a/core/test/base/matrix_data.cpp +++ b/core/test/base/matrix_data.cpp @@ -87,7 +87,7 @@ TEST(MatrixData, InitializesWithRandomValues) ASSERT_EQ(m.size, gko::dim<2>(2, 3)); ASSERT_LE(m.nonzeros.size(), 6); - for (const auto &elem : m.nonzeros) { + for (const auto& elem : m.nonzeros) { EXPECT_TRUE(-1 <= elem.value && elem.value <= 1); } } @@ -288,7 +288,7 @@ TEST(MatrixData, InitializesBlockDiagonalMatrixFromBlockList) struct dummy_distribution { template - double operator()(RandomEngine &&) const + double operator()(RandomEngine&&) const { if (last >= 2.0) { last = 0.0; diff --git a/core/test/base/mtx_io.cpp b/core/test/base/mtx_io.cpp index 83da9f28b21..38e58acbd93 100644 --- a/core/test/base/mtx_io.cpp +++ b/core/test/base/mtx_io.cpp @@ -65,7 +65,7 @@ TEST(MtxReader, ReadsDenseDoubleRealMtx) auto data = gko::read_raw(iss); ASSERT_EQ(data.size, gko::dim<2>(2, 3)); - auto &v = data.nonzeros; + auto& v = data.nonzeros; ASSERT_EQ(v[0], tpl(0, 0, 1.0)); ASSERT_EQ(v[1], tpl(0, 1, 3.0)); ASSERT_EQ(v[2], tpl(0, 2, 2.0)); @@ -91,7 +91,7 @@ TEST(MtxReader, ReadsDenseDoubleRealMtxWith64Index) auto data = gko::read_raw(iss); ASSERT_EQ(data.size, gko::dim<2>(2, 3)); - auto &v = data.nonzeros; + auto& v = data.nonzeros; ASSERT_EQ(v[0], tpl(0, 0, 1.0)); ASSERT_EQ(v[1], tpl(0, 1, 3.0)); ASSERT_EQ(v[2], tpl(0, 2, 2.0)); @@ -117,7 +117,7 @@ TEST(MtxReader, ReadsDenseFloatIntegerMtx) auto data = gko::read_raw(iss); ASSERT_EQ(data.size, gko::dim<2>(2, 3)); - auto &v = data.nonzeros; + auto& v = data.nonzeros; ASSERT_EQ(v[0], tpl(0, 0, 1.0)); ASSERT_EQ(v[1], tpl(0, 1, 3.0)); ASSERT_EQ(v[2], tpl(0, 2, 2.0)); @@ -143,7 +143,7 @@ TEST(MtxReader, ReadsDenseFloatIntegerMtxWith64Index) auto data = gko::read_raw(iss); ASSERT_EQ(data.size, gko::dim<2>(2, 3)); - auto &v = data.nonzeros; + auto& v = data.nonzeros; ASSERT_EQ(v[0], tpl(0, 0, 1.0)); ASSERT_EQ(v[1], tpl(0, 1, 3.0)); ASSERT_EQ(v[2], tpl(0, 2, 2.0)); @@ -170,7 +170,7 @@ TEST(MtxReader, ReadsDenseComplexDoubleMtx) auto data = gko::read_raw(iss); ASSERT_EQ(data.size, gko::dim<2>(2, 3)); - auto &v = data.nonzeros; + auto& v = data.nonzeros; ASSERT_EQ(v[0], tpl(0, 0, cpx(1.0, 2.0))); ASSERT_EQ(v[1], tpl(0, 1, cpx(3.0, 1.0))); ASSERT_EQ(v[2], tpl(0, 2, cpx(2.0, 4.0))); @@ -197,7 +197,7 @@ TEST(MtxReader, ReadsDenseComplexDoubleMtxWith64Index) auto data = gko::read_raw(iss); ASSERT_EQ(data.size, gko::dim<2>(2, 3)); - auto &v = data.nonzeros; + auto& v = data.nonzeros; ASSERT_EQ(v[0], tpl(0, 0, cpx(1.0, 2.0))); ASSERT_EQ(v[1], tpl(0, 1, cpx(3.0, 1.0))); ASSERT_EQ(v[2], tpl(0, 2, cpx(2.0, 4.0))); @@ -224,7 +224,7 @@ TEST(MtxReader, ReadsDenseComplexFloatMtx) auto data = gko::read_raw(iss); ASSERT_EQ(data.size, gko::dim<2>(2, 3)); - auto &v = data.nonzeros; + auto& v = data.nonzeros; ASSERT_EQ(v[0], tpl(0, 0, cpx(1.0, 2.0))); ASSERT_EQ(v[1], tpl(0, 1, cpx(3.0, 1.0))); ASSERT_EQ(v[2], tpl(0, 2, cpx(2.0, 4.0))); @@ -251,7 +251,7 @@ TEST(MtxReader, ReadsDenseComplexFloatMtxWith64Index) auto data = gko::read_raw(iss); ASSERT_EQ(data.size, gko::dim<2>(2, 3)); - auto &v = data.nonzeros; + auto& v = data.nonzeros; ASSERT_EQ(v[0], tpl(0, 0, cpx(1.0, 2.0))); ASSERT_EQ(v[1], tpl(0, 1, cpx(3.0, 1.0))); ASSERT_EQ(v[2], tpl(0, 2, cpx(2.0, 4.0))); @@ -275,7 +275,7 @@ TEST(MtxReader, ReadsSparseRealMtx) auto data = gko::read_raw(iss); ASSERT_EQ(data.size, gko::dim<2>(2, 3)); - auto &v = data.nonzeros; + auto& v = data.nonzeros; ASSERT_EQ(v[0], tpl(0, 0, 1.0)); ASSERT_EQ(v[1], tpl(0, 1, 3.0)); ASSERT_EQ(v[2], tpl(0, 2, 2.0)); @@ -297,7 +297,7 @@ TEST(MtxReader, ReadsSparseRealSymetricMtx) auto data = gko::read_raw(iss); ASSERT_EQ(data.size, gko::dim<2>(3, 3)); - auto &v = data.nonzeros; + auto& v = data.nonzeros; ASSERT_EQ(v[0], tpl(0, 0, 1.0)); ASSERT_EQ(v[1], tpl(0, 1, 2.0)); ASSERT_EQ(v[2], tpl(0, 2, 3.0)); @@ -319,7 +319,7 @@ TEST(MtxReader, ReadsSparseRealSkewSymetricMtx) auto data = gko::read_raw(iss); ASSERT_EQ(data.size, gko::dim<2>(3, 3)); - auto &v = data.nonzeros; + auto& v = data.nonzeros; ASSERT_EQ(v[0], tpl(0, 1, -2.0)); ASSERT_EQ(v[1], tpl(0, 2, -3.0)); ASSERT_EQ(v[2], tpl(1, 0, 2.0)); @@ -341,7 +341,7 @@ TEST(MtxReader, ReadsSparsePatternMtx) auto data = gko::read_raw(iss); ASSERT_EQ(data.size, gko::dim<2>(2, 3)); - auto &v = data.nonzeros; + auto& v = data.nonzeros; ASSERT_EQ(v[0], tpl(0, 0, 1.0)); ASSERT_EQ(v[1], tpl(0, 1, 1.0)); ASSERT_EQ(v[2], tpl(0, 2, 1.0)); @@ -364,7 +364,7 @@ TEST(MtxReader, ReadsSparseComplexMtx) auto data = gko::read_raw(iss); ASSERT_EQ(data.size, gko::dim<2>(2, 3)); - auto &v = data.nonzeros; + auto& v = data.nonzeros; ASSERT_EQ(v[0], tpl(0, 0, cpx(1.0, 2.0))); ASSERT_EQ(v[1], tpl(0, 1, cpx(3.0, 1.0))); ASSERT_EQ(v[2], tpl(0, 2, cpx(2.0, 4.0))); @@ -385,7 +385,7 @@ TEST(MtxReader, ReadsSparseComplexHermitianMtx) auto data = gko::read_raw(iss); ASSERT_EQ(data.size, gko::dim<2>(2, 3)); - auto &v = data.nonzeros; + auto& v = data.nonzeros; ASSERT_EQ(v[0], tpl(0, 1, cpx(3.0, 1.0))); ASSERT_EQ(v[1], tpl(0, 2, cpx(2.0, 4.0))); ASSERT_EQ(v[2], tpl(1, 0, cpx(3.0, -1.0))); @@ -611,15 +611,15 @@ class DummyLinOp using index_type = IndexType; using mat_data = gko::matrix_data; - void read(const mat_data &data) override { data_ = data; } + void read(const mat_data& data) override { data_ = data; } - void write(mat_data &data) const override { data = data_; } + void write(mat_data& data) const override { data = data_; } protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override {} + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override {} - void apply_impl(const gko::LinOp *alpha, const gko::LinOp *b, - const gko::LinOp *beta, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* alpha, const gko::LinOp* b, + const gko::LinOp* beta, gko::LinOp* x) const override {} explicit DummyLinOp(std::shared_ptr exec) @@ -661,9 +661,9 @@ TYPED_TEST(RealDummyLinOpTest, ReadsLinOpFromStream) auto lin_op = gko::read>( iss, gko::ReferenceExecutor::create()); - const auto &data = lin_op->data_; + const auto& data = lin_op->data_; ASSERT_EQ(data.size, gko::dim<2>(2, 3)); - const auto &v = data.nonzeros; + const auto& v = data.nonzeros; ASSERT_EQ(v[0], tpl(0, 0, 1.0)); ASSERT_EQ(v[1], tpl(0, 1, 3.0)); ASSERT_EQ(v[2], tpl(0, 2, 2.0)); @@ -734,9 +734,9 @@ TYPED_TEST(ComplexDummyLinOpTest, ReadsLinOpFromStream) auto lin_op = gko::read>( iss, gko::ReferenceExecutor::create()); - const auto &data = lin_op->data_; + const auto& data = lin_op->data_; ASSERT_EQ(data.size, gko::dim<2>(2, 3)); - const auto &v = data.nonzeros; + const auto& v = data.nonzeros; ASSERT_EQ(v[0], tpl(0, 0, value_type{1.0, 2.0})); ASSERT_EQ(v[1], tpl(0, 1, value_type{3.0, 4.0})); ASSERT_EQ(v[2], tpl(0, 2, value_type{2.0, 3.0})); diff --git a/core/test/base/perturbation.cpp b/core/test/base/perturbation.cpp index c9a6f1a1440..4f8a2b78f87 100644 --- a/core/test/base/perturbation.cpp +++ b/core/test/base/perturbation.cpp @@ -48,10 +48,10 @@ struct DummyOperator : public gko::EnableLinOp { : gko::EnableLinOp(exec, size) {} - void apply_impl(const LinOp *b, LinOp *x) const override {} + void apply_impl(const LinOp* b, LinOp* x) const override {} - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override {} }; @@ -64,10 +64,10 @@ struct TransposableDummyOperator : gko::EnableLinOp(exec, size) {} - void apply_impl(const LinOp *b, LinOp *x) const override {} + void apply_impl(const LinOp* b, LinOp* x) const override {} - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override {} std::unique_ptr transpose() const override diff --git a/core/test/base/polymorphic_object.cpp b/core/test/base/polymorphic_object.cpp index 62c4445d7fb..01220a48f64 100644 --- a/core/test/base/polymorphic_object.cpp +++ b/core/test/base/polymorphic_object.cpp @@ -160,9 +160,9 @@ struct ConvertibleToDummyObject x{v} {} - void convert_to(DummyObject *obj) const override { obj->x = x; } + void convert_to(DummyObject* obj) const override { obj->x = x; } - void move_to(DummyObject *obj) override { obj->x = x; } + void move_to(DummyObject* obj) override { obj->x = x; } int x; }; diff --git a/core/test/base/range.cpp b/core/test/base/range.cpp index c15c9c412da..dd77febbf45 100644 --- a/core/test/base/range.cpp +++ b/core/test/base/range.cpp @@ -155,7 +155,7 @@ struct dummy_accessor { int operator()(int a, int b, int c) const { return x * a + y * b + c; } - void copy_from(const dummy_accessor &other) const + void copy_from(const dummy_accessor& other) const { x = other.x; y = other.y; diff --git a/core/test/base/sanitizers.cpp b/core/test/base/sanitizers.cpp index e3b7a23b628..25f00e81c73 100644 --- a/core/test/base/sanitizers.cpp +++ b/core/test/base/sanitizers.cpp @@ -40,7 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. TEST(Sanitizers, UseAfterFree) { - char *x = new char[50]; + char* x = new char[50]; x[0] = 'H'; x[1] = 'I'; x[2] = '\n'; @@ -53,7 +53,7 @@ TEST(Sanitizers, UseAfterFree) TEST(Sanitizers, MemoryLeak) { - char *x = new char[50]; + char* x = new char[50]; x[0] = 'H'; x[1] = 'I'; x[2] = '\n'; @@ -70,7 +70,7 @@ TEST(Sanitizers, UndefinedBehavior) int Global = 0; -void *Thread(void *x) +void* Thread(void* x) { Global = 42; return x; diff --git a/core/test/base/utils.cpp b/core/test/base/utils.cpp index 6d8985e1dff..19cc8b14e78 100644 --- a/core/test/base/utils.cpp +++ b/core/test/base/utils.cpp @@ -212,7 +212,7 @@ TEST(Lend, LendsUniquePointer) auto lent = gko::lend(p); - ::testing::StaticAssertTypeEq(); + ::testing::StaticAssertTypeEq(); ASSERT_EQ(p.get(), lent); } @@ -223,7 +223,7 @@ TEST(Lend, LendsSharedPointer) auto lent = gko::lend(p); - ::testing::StaticAssertTypeEq(); + ::testing::StaticAssertTypeEq(); ASSERT_EQ(p.get(), lent); } @@ -234,7 +234,7 @@ TEST(Lend, LendsPlainPointer) auto lent = gko::lend(p.get()); - ::testing::StaticAssertTypeEq(); + ::testing::StaticAssertTypeEq(); ASSERT_EQ(p.get(), lent); } @@ -243,7 +243,7 @@ TEST(As, ConvertsPolymorphicType) { Derived d; - Base *b = &d; + Base* b = &d; ASSERT_EQ(gko::as(b), &d); } @@ -252,12 +252,12 @@ TEST(As, ConvertsPolymorphicType) TEST(As, FailsToConvertIfNotRelated) { Derived d; - Base *b = &d; + Base* b = &d; try { gko::as(b); FAIL(); - } catch (gko::NotSupported &m) { + } catch (gko::NotSupported& m) { std::string msg{m.what()}; auto expected = gko::name_demangling::get_type_name(typeid(Derived)); ASSERT_TRUE( @@ -269,7 +269,7 @@ TEST(As, FailsToConvertIfNotRelated) TEST(As, ConvertsConstantPolymorphicType) { Derived d; - const Base *b = &d; + const Base* b = &d; ASSERT_EQ(gko::as(b), &d); } @@ -278,12 +278,12 @@ TEST(As, ConvertsConstantPolymorphicType) TEST(As, FailsToConvertConstantIfNotRelated) { Derived d; - const Base *b = &d; + const Base* b = &d; try { gko::as(b); FAIL(); - } catch (gko::NotSupported &m) { + } catch (gko::NotSupported& m) { std::string msg{m.what()}; auto expected = gko::name_demangling::get_type_name(typeid(Derived)); ASSERT_TRUE( @@ -452,7 +452,7 @@ TEST_F(TemporaryClone, DoesntCopyBackConstAfterLeavingScope) { { auto clone = make_temporary_clone( - omp, static_cast(gko::lend(obj))); + omp, static_cast(gko::lend(obj))); obj->data = 7; } diff --git a/core/test/log/logger.cpp b/core/test/log/logger.cpp index d9b1cd11f4e..563ea0ded12 100644 --- a/core/test/log/logger.cpp +++ b/core/test/log/logger.cpp @@ -141,14 +141,14 @@ struct DummyLogger : gko::log::Logger { explicit DummyLogger( std::shared_ptr exec, - const mask_type &enabled_events = Logger::all_events_mask) + const mask_type& enabled_events = Logger::all_events_mask) : Logger(exec, enabled_events) {} void on_iteration_complete( - const gko::LinOp *solver, const gko::size_type &num_iterations, - const gko::LinOp *residual, const gko::LinOp *solution = nullptr, - const gko::LinOp *residual_norm = nullptr) const override + const gko::LinOp* solver, const gko::size_type& num_iterations, + const gko::LinOp* residual, const gko::LinOp* solution = nullptr, + const gko::LinOp* residual_norm = nullptr) const override { this->num_iterations_ = num_iterations; } diff --git a/core/test/log/papi.cpp b/core/test/log/papi.cpp index c5fb0d98f95..290da717047 100644 --- a/core/test/log/papi.cpp +++ b/core/test/log/papi.cpp @@ -74,8 +74,8 @@ class Papi : public ::testing::Test { void TearDown() { eventset = PAPI_NULL; } template - const std::string init(const gko::log::Logger::mask_type &event, - const std::string &event_name, U *ptr) + const std::string init(const gko::log::Logger::mask_type& event, + const std::string& event_name, U* ptr) { logger = gko::log::Papi::create(exec, event); std::ostringstream os; @@ -84,7 +84,7 @@ class Papi : public ::testing::Test { return os.str(); } - void add_event(const std::string &event_name) + void add_event(const std::string& event_name) { int code; int ret_val = PAPI_event_name_to_code(event_name.c_str(), &code); @@ -106,7 +106,7 @@ class Papi : public ::testing::Test { } } - void stop(long long int *values) + void stop(long long int* values) { int ret_val = PAPI_stop(eventset, values); if (PAPI_OK != ret_val) { diff --git a/core/test/log/record.cpp b/core/test/log/record.cpp index a9a0947d199..af4ccf62809 100644 --- a/core/test/log/record.cpp +++ b/core/test/log/record.cpp @@ -70,7 +70,7 @@ TEST(Record, CatchesAllocationStarted) logger->on(exec.get(), 42); - auto &data = logger->get().allocation_started.back(); + auto& data = logger->get().allocation_started.back(); ASSERT_EQ(data->exec, exec.get()); ASSERT_EQ(data->num_bytes, 42); ASSERT_EQ(data->location, 0); @@ -87,7 +87,7 @@ TEST(Record, CatchesAllocationCompleted) logger->on(exec.get(), 42, ptr); - auto &data = logger->get().allocation_completed.back(); + auto& data = logger->get().allocation_completed.back(); ASSERT_EQ(data->exec, exec.get()); ASSERT_EQ(data->num_bytes, 42); ASSERT_EQ(data->location, ptr); @@ -104,7 +104,7 @@ TEST(Record, CatchesFreeStarted) logger->on(exec.get(), ptr); - auto &data = logger->get().free_started.back(); + auto& data = logger->get().free_started.back(); ASSERT_EQ(data->exec, exec.get()); ASSERT_EQ(data->num_bytes, 0); ASSERT_EQ(data->location, ptr); @@ -121,7 +121,7 @@ TEST(Record, CatchesFreeCompleted) logger->on(exec.get(), ptr); - auto &data = logger->get().free_completed.back(); + auto& data = logger->get().free_completed.back(); ASSERT_EQ(data->exec, exec.get()); ASSERT_EQ(data->num_bytes, 0); ASSERT_EQ(data->location, ptr); @@ -141,7 +141,7 @@ TEST(Record, CatchesCopyStarted) logger->on(exec.get(), exec.get(), ptr_from, ptr_to, 42); - auto &data = logger->get().copy_started.back(); + auto& data = logger->get().copy_started.back(); auto data_from = std::get<0>(*data); auto data_to = std::get<1>(*data); ASSERT_EQ(data_from.exec, exec.get()); @@ -166,7 +166,7 @@ TEST(Record, CatchesCopyCompleted) logger->on(exec.get(), exec.get(), ptr_from, ptr_to, 42); - auto &data = logger->get().copy_completed.back(); + auto& data = logger->get().copy_completed.back(); auto data_from = std::get<0>(*data); auto data_to = std::get<1>(*data); ASSERT_EQ(data_from.exec, exec.get()); @@ -187,7 +187,7 @@ TEST(Record, CatchesOperationLaunched) logger->on(exec.get(), &op); - auto &data = logger->get().operation_launched.back(); + auto& data = logger->get().operation_launched.back(); ASSERT_EQ(data->exec, exec.get()); ASSERT_EQ(data->operation, &op); } @@ -202,7 +202,7 @@ TEST(Record, CatchesOperationCompleted) logger->on(exec.get(), &op); - auto &data = logger->get().operation_completed.back(); + auto& data = logger->get().operation_completed.back(); ASSERT_EQ(data->exec, exec.get()); ASSERT_EQ(data->operation, &op); } @@ -220,7 +220,7 @@ TEST(Record, CatchesPolymorphicObjectCreateStarted) po.get()); - auto &data = logger->get().polymorphic_object_create_started.back(); + auto& data = logger->get().polymorphic_object_create_started.back(); ASSERT_EQ(data->exec, exec.get()); GKO_ASSERT_MTX_NEAR(gko::as(data->input.get()), po.get(), 0); ASSERT_EQ(data->output.get(), nullptr); @@ -239,7 +239,7 @@ TEST(Record, CatchesPolymorphicObjectCreateCompleted) logger->on( exec.get(), po.get(), output.get()); - auto &data = logger->get().polymorphic_object_create_completed.back(); + auto& data = logger->get().polymorphic_object_create_completed.back(); ASSERT_EQ(data->exec, exec.get()); GKO_ASSERT_MTX_NEAR(gko::as(data->input.get()), po.get(), 0); GKO_ASSERT_MTX_NEAR(gko::as(data->output.get()), output.get(), 0); @@ -258,7 +258,7 @@ TEST(Record, CatchesPolymorphicObjectCopyStarted) logger->on( exec.get(), from.get(), to.get()); - auto &data = logger->get().polymorphic_object_copy_started.back(); + auto& data = logger->get().polymorphic_object_copy_started.back(); ASSERT_EQ(data->exec, exec.get()); GKO_ASSERT_MTX_NEAR(gko::as(data->input.get()), from.get(), 0); GKO_ASSERT_MTX_NEAR(gko::as(data->output.get()), to.get(), 0); @@ -278,7 +278,7 @@ TEST(Record, CatchesPolymorphicObjectCopyCompleted) exec.get(), from.get(), to.get()); - auto &data = logger->get().polymorphic_object_copy_completed.back(); + auto& data = logger->get().polymorphic_object_copy_completed.back(); ASSERT_EQ(data->exec, exec.get()); GKO_ASSERT_MTX_NEAR(gko::as(data->input.get()), from.get(), 0); GKO_ASSERT_MTX_NEAR(gko::as(data->output.get()), to.get(), 0); @@ -297,7 +297,7 @@ TEST(Record, CatchesPolymorphicObjectDeleted) po.get()); - auto &data = logger->get().polymorphic_object_deleted.back(); + auto& data = logger->get().polymorphic_object_deleted.back(); ASSERT_EQ(data->exec, exec.get()); GKO_ASSERT_MTX_NEAR(gko::as(data->input.get()), po.get(), 0); ASSERT_EQ(data->output, nullptr); @@ -317,7 +317,7 @@ TEST(Record, CatchesLinOpApplyStarted) logger->on(A.get(), b.get(), x.get()); - auto &data = logger->get().linop_apply_started.back(); + auto& data = logger->get().linop_apply_started.back(); GKO_ASSERT_MTX_NEAR(gko::as(data->A.get()), A, 0); ASSERT_EQ(data->alpha, nullptr); GKO_ASSERT_MTX_NEAR(gko::as(data->b.get()), b, 0); @@ -339,7 +339,7 @@ TEST(Record, CatchesLinOpApplyCompleted) logger->on(A.get(), b.get(), x.get()); - auto &data = logger->get().linop_apply_completed.back(); + auto& data = logger->get().linop_apply_completed.back(); GKO_ASSERT_MTX_NEAR(gko::as(data->A.get()), A, 0); ASSERT_EQ(data->alpha, nullptr); GKO_ASSERT_MTX_NEAR(gko::as(data->b.get()), b, 0); @@ -363,7 +363,7 @@ TEST(Record, CatchesLinOpAdvancedApplyStarted) logger->on( A.get(), alpha.get(), b.get(), beta.get(), x.get()); - auto &data = logger->get().linop_advanced_apply_started.back(); + auto& data = logger->get().linop_advanced_apply_started.back(); GKO_ASSERT_MTX_NEAR(gko::as(data->A.get()), A, 0); GKO_ASSERT_MTX_NEAR(gko::as(data->alpha.get()), alpha, 0); GKO_ASSERT_MTX_NEAR(gko::as(data->b.get()), b, 0); @@ -387,7 +387,7 @@ TEST(Record, CatchesLinOpAdvancedApplyCompleted) logger->on( A.get(), alpha.get(), b.get(), beta.get(), x.get()); - auto &data = logger->get().linop_advanced_apply_completed.back(); + auto& data = logger->get().linop_advanced_apply_completed.back(); GKO_ASSERT_MTX_NEAR(gko::as(data->A.get()), A, 0); GKO_ASSERT_MTX_NEAR(gko::as(data->alpha.get()), alpha, 0); GKO_ASSERT_MTX_NEAR(gko::as(data->b.get()), b, 0); @@ -411,7 +411,7 @@ TEST(Record, CatchesLinopFactoryGenerateStarted) logger->on(factory.get(), input.get()); - auto &data = logger->get().linop_factory_generate_started.back(); + auto& data = logger->get().linop_factory_generate_started.back(); ASSERT_EQ(data->factory, factory.get()); ASSERT_NE(data->input.get(), nullptr); ASSERT_EQ(data->output.get(), nullptr); @@ -434,7 +434,7 @@ TEST(Record, CatchesLinopFactoryGenerateCompleted) logger->on( factory.get(), input.get(), output.get()); - auto &data = logger->get().linop_factory_generate_completed.back(); + auto& data = logger->get().linop_factory_generate_completed.back(); ASSERT_EQ(data->factory, factory.get()); ASSERT_NE(data->input.get(), nullptr); ASSERT_NE(data->output.get(), nullptr); @@ -455,7 +455,7 @@ TEST(Record, CatchesCriterionCheckStarted) criterion.get(), 1, nullptr, nullptr, nullptr, RelativeStoppingId, true); - auto &data = logger->get().criterion_check_started.back(); + auto& data = logger->get().criterion_check_started.back(); ASSERT_NE(data->criterion, nullptr); ASSERT_EQ(data->stopping_id, RelativeStoppingId); ASSERT_EQ(data->set_finalized, true); @@ -481,7 +481,7 @@ TEST(Record, CatchesCriterionCheckCompletedOld) stop_status.get_data()->reset(); stop_status.get_data()->stop(RelativeStoppingId); - auto &data = logger->get().criterion_check_completed.back(); + auto& data = logger->get().criterion_check_completed.back(); ASSERT_NE(data->criterion, nullptr); ASSERT_EQ(data->stopping_id, RelativeStoppingId); ASSERT_EQ(data->set_finalized, true); @@ -511,7 +511,7 @@ TEST(Record, CatchesCriterionCheckCompleted) stop_status.get_data()->reset(); stop_status.get_data()->stop(RelativeStoppingId); - auto &data = logger->get().criterion_check_completed.back(); + auto& data = logger->get().criterion_check_completed.back(); ASSERT_NE(data->criterion, nullptr); ASSERT_EQ(data->stopping_id, RelativeStoppingId); ASSERT_EQ(data->set_finalized, true); @@ -546,7 +546,7 @@ TEST(Record, CatchesIterations) solver.get(), num_iters, residual.get(), solution.get(), residual_norm.get(), implicit_sq_residual_norm.get()); - auto &data = logger->get().iteration_completed.back(); + auto& data = logger->get().iteration_completed.back(); ASSERT_NE(data->solver.get(), nullptr); ASSERT_EQ(data->num_iterations, num_iters); GKO_ASSERT_MTX_NEAR(gko::as(data->residual.get()), residual, 0); diff --git a/core/test/matrix/coo.cpp b/core/test/matrix/coo.cpp index 3500dc77edc..0047a336836 100644 --- a/core/test/matrix/coo.cpp +++ b/core/test/matrix/coo.cpp @@ -56,9 +56,9 @@ class Coo : public ::testing::Test { mtx(gko::matrix::Coo::create( exec, gko::dim<2>{2, 3}, 4)) { - value_type *v = mtx->get_values(); - index_type *c = mtx->get_col_idxs(); - index_type *r = mtx->get_row_idxs(); + value_type* v = mtx->get_values(); + index_type* c = mtx->get_col_idxs(); + index_type* r = mtx->get_row_idxs(); r[0] = 0; r[1] = 0; r[2] = 0; @@ -76,7 +76,7 @@ class Coo : public ::testing::Test { std::shared_ptr exec; std::unique_ptr mtx; - void assert_equal_to_original_mtx(const Mtx *m) + void assert_equal_to_original_mtx(const Mtx* m) { auto v = m->get_const_values(); auto c = m->get_const_col_idxs(); @@ -97,7 +97,7 @@ class Coo : public ::testing::Test { EXPECT_EQ(v[3], value_type{5.0}); } - void assert_empty(const Mtx *m) + void assert_empty(const Mtx* m) { ASSERT_EQ(m->get_size(), gko::dim<2>(0, 0)); ASSERT_EQ(m->get_num_stored_elements(), 0); @@ -183,7 +183,7 @@ TYPED_TEST(Coo, CanBeCloned) this->assert_equal_to_original_mtx(this->mtx.get()); this->mtx->get_values()[1] = 5.0; - this->assert_equal_to_original_mtx(dynamic_cast(clone.get())); + this->assert_equal_to_original_mtx(dynamic_cast(clone.get())); } diff --git a/core/test/matrix/csr.cpp b/core/test/matrix/csr.cpp index a9bcb92a7c8..96b16267184 100644 --- a/core/test/matrix/csr.cpp +++ b/core/test/matrix/csr.cpp @@ -57,10 +57,10 @@ class Csr : public ::testing::Test { exec, gko::dim<2>{2, 3}, 4, std::make_shared(2))) { - value_type *v = mtx->get_values(); - index_type *c = mtx->get_col_idxs(); - index_type *r = mtx->get_row_ptrs(); - index_type *s = mtx->get_srow(); + value_type* v = mtx->get_values(); + index_type* c = mtx->get_col_idxs(); + index_type* r = mtx->get_row_ptrs(); + index_type* s = mtx->get_srow(); r[0] = 0; r[1] = 3; r[2] = 4; @@ -78,7 +78,7 @@ class Csr : public ::testing::Test { std::shared_ptr exec; std::unique_ptr mtx; - void assert_equal_to_original_mtx(const Mtx *m) + void assert_equal_to_original_mtx(const Mtx* m) { auto v = m->get_const_values(); auto c = m->get_const_col_idxs(); @@ -100,7 +100,7 @@ class Csr : public ::testing::Test { EXPECT_EQ(s[0], 0); } - void assert_empty(const Mtx *m) + void assert_empty(const Mtx* m) { ASSERT_EQ(m->get_size(), gko::dim<2>(0, 0)); ASSERT_EQ(m->get_num_stored_elements(), 0); @@ -191,7 +191,7 @@ TYPED_TEST(Csr, CanBeCloned) this->assert_equal_to_original_mtx(this->mtx.get()); this->mtx->get_values()[1] = 5.0; - this->assert_equal_to_original_mtx(dynamic_cast(clone.get())); + this->assert_equal_to_original_mtx(dynamic_cast(clone.get())); } diff --git a/core/test/matrix/csr_builder.cpp b/core/test/matrix/csr_builder.cpp index d5a0187ce93..2fc2b42e98c 100644 --- a/core/test/matrix/csr_builder.cpp +++ b/core/test/matrix/csr_builder.cpp @@ -89,8 +89,8 @@ TYPED_TEST(CsrBuilder, UpdatesSrowOnDestruction) using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; struct mock_strategy : public Mtx::strategy_type { - virtual void process(const gko::Array &, - gko::Array *) override + virtual void process(const gko::Array&, + gko::Array*) override { *was_called = true; } @@ -102,9 +102,9 @@ TYPED_TEST(CsrBuilder, UpdatesSrowOnDestruction) return std::make_shared(*was_called); } - mock_strategy(bool &flag) : Mtx::strategy_type(""), was_called(&flag) {} + mock_strategy(bool& flag) : Mtx::strategy_type(""), was_called(&flag) {} - bool *was_called; + bool* was_called; }; bool was_called{}; this->mtx->set_strategy(std::make_shared(was_called)); diff --git a/core/test/matrix/dense.cpp b/core/test/matrix/dense.cpp index 77dd543ac68..561fc230ade 100644 --- a/core/test/matrix/dense.cpp +++ b/core/test/matrix/dense.cpp @@ -57,7 +57,7 @@ class Dense : public ::testing::Test { {} - static void assert_equal_to_original_mtx(gko::matrix::Dense *m) + static void assert_equal_to_original_mtx(gko::matrix::Dense* m) { ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3)); ASSERT_EQ(m->get_stride(), 4); @@ -70,7 +70,7 @@ class Dense : public ::testing::Test { ASSERT_EQ(m->at(1, 2), value_type{3.5}); } - static void assert_empty(gko::matrix::Dense *m) + static void assert_empty(gko::matrix::Dense* m) { ASSERT_EQ(m->get_size(), gko::dim<2>(0, 0)); ASSERT_EQ(m->get_num_stored_elements(), 0); diff --git a/core/test/matrix/diagonal.cpp b/core/test/matrix/diagonal.cpp index 9e35a51c31e..eca2892fdec 100644 --- a/core/test/matrix/diagonal.cpp +++ b/core/test/matrix/diagonal.cpp @@ -52,7 +52,7 @@ class Diagonal : public ::testing::Test { : exec(gko::ReferenceExecutor::create()), diag(gko::matrix::Diagonal::create(exec, 3u)) { - value_type *v = diag->get_values(); + value_type* v = diag->get_values(); v[0] = 1.0; v[1] = 3.0; v[2] = 2.0; @@ -61,7 +61,7 @@ class Diagonal : public ::testing::Test { std::shared_ptr exec; std::unique_ptr diag; - void assert_equal_to_original_mtx(const Diag *m) + void assert_equal_to_original_mtx(const Diag* m) { auto v = m->get_const_values(); ASSERT_EQ(m->get_size(), gko::dim<2>(3, 3)); @@ -70,7 +70,7 @@ class Diagonal : public ::testing::Test { EXPECT_EQ(v[2], value_type{2.0}); } - void assert_empty(const Diag *m) + void assert_empty(const Diag* m) { ASSERT_EQ(m->get_size(), gko::dim<2>(0, 0)); ASSERT_EQ(m->get_const_values(), nullptr); @@ -146,7 +146,7 @@ TYPED_TEST(Diagonal, CanBeCloned) this->assert_equal_to_original_mtx(this->diag.get()); this->diag->get_values()[1] = 5.0; - this->assert_equal_to_original_mtx(dynamic_cast(clone.get())); + this->assert_equal_to_original_mtx(dynamic_cast(clone.get())); } diff --git a/core/test/matrix/ell.cpp b/core/test/matrix/ell.cpp index f62e17e3958..c8e6eeeef4b 100644 --- a/core/test/matrix/ell.cpp +++ b/core/test/matrix/ell.cpp @@ -56,8 +56,8 @@ class Ell : public ::testing::Test { mtx(gko::matrix::Ell::create( exec, gko::dim<2>{2, 3}, 3)) { - value_type *v = mtx->get_values(); - index_type *c = mtx->get_col_idxs(); + value_type* v = mtx->get_values(); + index_type* c = mtx->get_col_idxs(); c[0] = 0; c[1] = 1; c[2] = 1; @@ -75,7 +75,7 @@ class Ell : public ::testing::Test { std::shared_ptr exec; std::unique_ptr mtx; - void assert_equal_to_original_mtx(const Mtx *m) + void assert_equal_to_original_mtx(const Mtx* m) { auto v = m->get_const_values(); auto c = m->get_const_col_idxs(); @@ -99,7 +99,7 @@ class Ell : public ::testing::Test { EXPECT_EQ(v[5], value_type{0.0}); } - void assert_empty(const Mtx *m) + void assert_empty(const Mtx* m) { ASSERT_EQ(m->get_size(), gko::dim<2>(0, 0)); ASSERT_EQ(m->get_num_stored_elements(), 0); @@ -185,7 +185,7 @@ TYPED_TEST(Ell, CanBeCloned) this->assert_equal_to_original_mtx(this->mtx.get()); this->mtx->get_values()[1] = 5.0; - this->assert_equal_to_original_mtx(static_cast(clone.get())); + this->assert_equal_to_original_mtx(static_cast(clone.get())); } diff --git a/core/test/matrix/fbcsr.cpp b/core/test/matrix/fbcsr.cpp index 80890e146e5..f026ff5b673 100644 --- a/core/test/matrix/fbcsr.cpp +++ b/core/test/matrix/fbcsr.cpp @@ -54,10 +54,10 @@ namespace { template void assert_matrices_are_same( - const gko::matrix::Fbcsr *const bm, - const gko::matrix::Csr *const cm, - const gko::matrix::Diagonal *const diam = nullptr, - const gko::matrix_data *const md = nullptr) + const gko::matrix::Fbcsr* const bm, + const gko::matrix::Csr* const cm, + const gko::matrix::Diagonal* const diam = nullptr, + const gko::matrix_data* const md = nullptr) { if (cm) { ASSERT_EQ(bm->get_size(), cm->get_size()); @@ -83,7 +83,7 @@ void assert_matrices_are_same( bm->get_const_values()); for (IndexType ibrow = 0; ibrow < nbrows; ibrow++) { - const IndexType *const browptr = bm->get_const_row_ptrs(); + const IndexType* const browptr = bm->get_const_row_ptrs(); const IndexType numblocksbrow = browptr[ibrow + 1] - browptr[ibrow]; for (IndexType irow = ibrow * bs; irow < ibrow * bs + bs; irow++) { const IndexType rowstart = browptr[ibrow] * bs * bs + @@ -94,7 +94,7 @@ void assert_matrices_are_same( } const IndexType iz_browstart = browptr[ibrow] * bs * bs; - const IndexType *const bcolinds = bm->get_const_col_idxs(); + const IndexType* const bcolinds = bm->get_const_col_idxs(); for (IndexType ibnz = browptr[ibrow]; ibnz < browptr[ibrow + 1]; ibnz++) { @@ -185,11 +185,11 @@ TYPED_TEST(FbcsrSample, SampleGeneratorsAreCorrect) check_sample_generator_common(fbsample); assert_matrices_are_same(fbmtx.get(), csmtx.get(), - static_cast(nullptr), &mdata); + static_cast(nullptr), &mdata); check_sample_generator_common(fbsample2); assert_matrices_are_same(fbmtx2.get(), csmtx2.get(), diag2.get()); for (index_type irow = 0; irow < fbsample2.nrows; irow++) { - const index_type *const row_ptrs = csmtx2->get_const_row_ptrs(); + const index_type* const row_ptrs = csmtx2->get_const_row_ptrs(); const index_type num_nnz_row = row_ptrs[irow + 1] - row_ptrs[irow]; ASSERT_EQ(nnzperrow.get_const_data()[irow], num_nnz_row); for (index_type iz = row_ptrs[irow]; iz < row_ptrs[irow + 1]; iz++) { @@ -258,9 +258,9 @@ class Fbcsr : public ::testing::Test { mtx(fbsample.generate_fbcsr()) { // backup for move tests - const value_type *const v = mtx->get_values(); - const index_type *const c = mtx->get_col_idxs(); - const index_type *const r = mtx->get_row_ptrs(); + const value_type* const v = mtx->get_values(); + const index_type* const c = mtx->get_col_idxs(); + const index_type* const r = mtx->get_row_ptrs(); orig_size = mtx->get_size(); orig_rowptrs.resize(fbsample.nbrows + 1); orig_colinds.resize(fbsample.nbnz); @@ -279,7 +279,7 @@ class Fbcsr : public ::testing::Test { std::vector orig_rowptrs; std::vector orig_colinds; - void assert_equal_to_original_mtx(const Mtx *m) + void assert_equal_to_original_mtx(const Mtx* m) { auto v = m->get_const_values(); auto c = m->get_const_col_idxs(); @@ -294,7 +294,7 @@ class Fbcsr : public ::testing::Test { ASSERT_EQ(m->get_num_block_cols(), m->get_size()[1] / bs); for (index_type irow = 0; irow < orig_size[0] / bs; irow++) { - const index_type *const rowptr = &orig_rowptrs[0]; + const index_type* const rowptr = &orig_rowptrs[0]; ASSERT_EQ(r[irow], rowptr[irow]); for (index_type inz = rowptr[irow]; inz < rowptr[irow + 1]; inz++) { @@ -308,7 +308,7 @@ class Fbcsr : public ::testing::Test { } } - void assert_empty(const Mtx *m) + void assert_empty(const Mtx* m) { ASSERT_EQ(m->get_size(), gko::dim<2>(0, 0)); ASSERT_EQ(m->get_num_stored_elements(), 0); @@ -388,9 +388,9 @@ TYPED_TEST(Fbcsr, CanBeCreatedFromExistingData) const size_type nbcols = this->fbsample.nbcols; const size_type bnnz = this->fbsample.nbnz; std::unique_ptr refmat = this->fbsample.generate_fbcsr(); - value_type *const values = refmat->get_values(); - index_type *const col_idxs = refmat->get_col_idxs(); - index_type *const row_ptrs = refmat->get_row_ptrs(); + value_type* const values = refmat->get_values(); + index_type* const col_idxs = refmat->get_col_idxs(); + index_type* const row_ptrs = refmat->get_row_ptrs(); auto mtx = gko::matrix::Fbcsr::create( this->exec, gko::dim<2>{nbrows * bs, nbcols * bs}, bs, @@ -436,7 +436,7 @@ TYPED_TEST(Fbcsr, CanBeCloned) this->assert_equal_to_original_mtx(this->mtx.get()); this->mtx->get_values()[1] = 5.0; - this->assert_equal_to_original_mtx(dynamic_cast(clone.get())); + this->assert_equal_to_original_mtx(dynamic_cast(clone.get())); } diff --git a/core/test/matrix/fbcsr_sample.hpp b/core/test/matrix/fbcsr_sample.hpp index 9e6ff969045..8efad5f33c8 100644 --- a/core/test/matrix/fbcsr_sample.hpp +++ b/core/test/matrix/fbcsr_sample.hpp @@ -96,9 +96,9 @@ class FbcsrSample { static_cast(ncols)}, nnz, bs); - value_type *const v = mtx->get_values(); - index_type *const c = mtx->get_col_idxs(); - index_type *const r = mtx->get_row_ptrs(); + value_type* const v = mtx->get_values(); + index_type* const c = mtx->get_col_idxs(); + index_type* const r = mtx->get_row_ptrs(); r[0] = 0; r[1] = 2; r[2] = 4; @@ -118,7 +118,7 @@ class FbcsrSample { "block size does not divide the size!"); for (index_type ibrow = 0; ibrow < mtx->get_num_block_rows(); ibrow++) { - const index_type *const browptr = mtx->get_row_ptrs(); + const index_type* const browptr = mtx->get_row_ptrs(); for (index_type inz = browptr[ibrow]; inz < browptr[ibrow + 1]; inz++) { const index_type bcolind = mtx->get_col_idxs()[inz]; @@ -299,7 +299,7 @@ class FbcsrSample2 { gko::Array r(exec, {0, 1, 3, 4}); gko::Array c(exec, {0, 0, 3, 2}); gko::Array vals(exec, nnz); - value_type *const v = vals.get_data(); + value_type* const v = vals.get_data(); for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + fbcsr_test_offset; v[0] = 1; @@ -325,7 +325,7 @@ class FbcsrSample2 { gko::Array c( exec, {0, 1, 0, 1, 0, 1, 6, 7, 0, 1, 6, 7, 4, 5, 4, 5}); gko::Array vals(exec, nnz); - value_type *const v = vals.get_data(); + value_type* const v = vals.get_data(); for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + fbcsr_test_offset; v[0] = 1; v[1] = 2; @@ -398,7 +398,7 @@ class FbcsrSampleSquare { gko::Array c(exec, {1, 1}); gko::Array r(exec, {0, 1, 2}); gko::Array vals(exec, nnz); - value_type *const v = vals.get_data(); + value_type* const v = vals.get_data(); for (IndexType i = 0; i < nnz; i++) v[i] = i; return Fbcsr::create(exec, @@ -444,7 +444,7 @@ class FbcsrSampleComplex { gko::Array r(exec, {0, 1, 3, 4}); gko::Array c(exec, {0, 0, 3, 2}); gko::Array vals(exec, nnz); - value_type *const v = vals.get_data(); + value_type* const v = vals.get_data(); for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + fbcsr_test_offset; using namespace std::complex_literals; @@ -471,7 +471,7 @@ class FbcsrSampleComplex { gko::Array c( exec, {0, 1, 0, 1, 0, 1, 6, 7, 0, 1, 6, 7, 4, 5, 4, 5}); gko::Array vals(exec, nnz); - value_type *const v = vals.get_data(); + value_type* const v = vals.get_data(); for (IndexType i = 0; i < nnz; i++) v[i] = 0.15 + fbcsr_test_offset; using namespace std::complex_literals; @@ -527,7 +527,7 @@ class FbcsrSampleUnsorted { exec, {0, 1, 20, 15, 12, 18, 5, 28, 3, 10, 29, 5, 9, 2, 16, 12, 21, 2, 0, 1, 5, 9, 12, 15, 17, 20, 22, 24, 27, 28}); gko::Array vals(exec, nnz); - value_type *const v = vals.get_data(); + value_type* const v = vals.get_data(); for (IndexType i = 0; i < nnz; i++) { v[i] = static_cast(i + 0.15 + fbcsr_test_offset); } diff --git a/core/test/matrix/hybrid.cpp b/core/test/matrix/hybrid.cpp index 57bf3e24c47..d83bac57033 100644 --- a/core/test/matrix/hybrid.cpp +++ b/core/test/matrix/hybrid.cpp @@ -71,8 +71,8 @@ class Hybrid : public ::testing::Test { mtx(gko::matrix::Hybrid::create( exec, gko::dim<2>{2, 3}, 2, 2, 1)) { - value_type *v = mtx->get_ell_values(); - index_type *c = mtx->get_ell_col_idxs(); + value_type* v = mtx->get_ell_values(); + index_type* c = mtx->get_ell_col_idxs(); c[0] = 0; c[1] = 1; c[2] = 1; @@ -89,7 +89,7 @@ class Hybrid : public ::testing::Test { std::shared_ptr exec; std::unique_ptr mtx; - void assert_equal_to_original_mtx(const Mtx *m) + void assert_equal_to_original_mtx(const Mtx* m) { auto v = m->get_const_ell_values(); auto c = m->get_const_ell_col_idxs(); @@ -113,7 +113,7 @@ class Hybrid : public ::testing::Test { EXPECT_EQ(m->get_const_coo_row_idxs()[0], 0); } - void assert_empty(const Mtx *m) + void assert_empty(const Mtx* m) { ASSERT_EQ(m->get_size(), gko::dim<2>(0, 0)); ASSERT_EQ(m->get_ell_num_stored_elements(), 0); @@ -186,7 +186,7 @@ TYPED_TEST(Hybrid, CanBeCloned) this->assert_equal_to_original_mtx(this->mtx.get()); this->mtx->get_ell_values()[1] = 5.0; - this->assert_equal_to_original_mtx(static_cast(clone.get())); + this->assert_equal_to_original_mtx(static_cast(clone.get())); } diff --git a/core/test/matrix/permutation.cpp b/core/test/matrix/permutation.cpp index c5c5fe81db1..ad7e186e76f 100644 --- a/core/test/matrix/permutation.cpp +++ b/core/test/matrix/permutation.cpp @@ -65,7 +65,7 @@ class Permutation : public ::testing::Test { static void assert_equal_to_original_mtx( - gko::matrix::Permutation *m) + gko::matrix::Permutation* m) { auto perm = m->get_permutation(); ASSERT_EQ(m->get_size(), gko::dim<2>(4, 3)); @@ -76,7 +76,7 @@ class Permutation : public ::testing::Test { ASSERT_EQ(perm[3], 3); } - static void assert_empty(gko::matrix::Permutation *m) + static void assert_empty(gko::matrix::Permutation* m) { ASSERT_EQ(m->get_size(), gko::dim<2>(0, 0)); ASSERT_EQ(m->get_permutation_size(), 0); diff --git a/core/test/matrix/sellp.cpp b/core/test/matrix/sellp.cpp index 65e3d532b20..905cac5c304 100644 --- a/core/test/matrix/sellp.cpp +++ b/core/test/matrix/sellp.cpp @@ -63,7 +63,7 @@ class Sellp : public ::testing::Test { std::shared_ptr exec; std::unique_ptr mtx; - void assert_equal_to_original_mtx(const Mtx *m) + void assert_equal_to_original_mtx(const Mtx* m) { auto v = m->get_const_values(); auto c = m->get_const_col_idxs(); @@ -95,7 +95,7 @@ class Sellp : public ::testing::Test { } void assert_equal_to_original_mtx_with_slice_size_and_stride_factor( - const Mtx *m) + const Mtx* m) { auto v = m->get_const_values(); auto c = m->get_const_col_idxs(); @@ -126,7 +126,7 @@ class Sellp : public ::testing::Test { EXPECT_EQ(v[5], value_type{0.0}); } - void assert_empty(const Mtx *m) + void assert_empty(const Mtx* m) { ASSERT_EQ(m->get_size(), gko::dim<2>(0, 0)); ASSERT_EQ(m->get_num_stored_elements(), 0); @@ -211,7 +211,7 @@ TYPED_TEST(Sellp, CanBeCloned) this->assert_equal_to_original_mtx(this->mtx.get()); this->mtx->get_values()[1] = 5.0; - this->assert_equal_to_original_mtx(dynamic_cast(clone.get())); + this->assert_equal_to_original_mtx(dynamic_cast(clone.get())); } diff --git a/core/test/matrix/sparsity_csr.cpp b/core/test/matrix/sparsity_csr.cpp index 4faf44dbccd..a815415c377 100644 --- a/core/test/matrix/sparsity_csr.cpp +++ b/core/test/matrix/sparsity_csr.cpp @@ -63,8 +63,8 @@ class SparsityCsr : public ::testing::Test { mtx(gko::matrix::SparsityCsr::create( exec, gko::dim<2>{2, 3}, 4)) { - index_type *c = mtx->get_col_idxs(); - index_type *r = mtx->get_row_ptrs(); + index_type* c = mtx->get_col_idxs(); + index_type* r = mtx->get_row_ptrs(); r[0] = 0; r[1] = 3; r[2] = 4; @@ -77,7 +77,7 @@ class SparsityCsr : public ::testing::Test { std::shared_ptr exec; std::unique_ptr mtx; - void assert_equal_to_original_mtx(const Mtx *m) + void assert_equal_to_original_mtx(const Mtx* m) { auto c = m->get_const_col_idxs(); auto r = m->get_const_row_ptrs(); @@ -94,7 +94,7 @@ class SparsityCsr : public ::testing::Test { EXPECT_EQ(v[0], value_type{1.0}); } - void assert_empty(Mtx *m) + void assert_empty(Mtx* m) { ASSERT_EQ(m->get_size(), gko::dim<2>(0, 0)); ASSERT_EQ(m->get_num_nonzeros(), 0); @@ -194,7 +194,7 @@ TYPED_TEST(SparsityCsr, CanBeCloned) auto clone = this->mtx->clone(); this->assert_equal_to_original_mtx(this->mtx.get()); - this->assert_equal_to_original_mtx(dynamic_cast(clone.get())); + this->assert_equal_to_original_mtx(dynamic_cast(clone.get())); } diff --git a/core/test/preconditioner/isai.cpp b/core/test/preconditioner/isai.cpp index fc8f6cee1dd..98a69a05965 100644 --- a/core/test/preconditioner/isai.cpp +++ b/core/test/preconditioner/isai.cpp @@ -57,10 +57,10 @@ struct DummyOperator : public gko::EnableLinOp, : gko::EnableLinOp(exec, size) {} - void apply_impl(const LinOp *b, LinOp *x) const override {} + void apply_impl(const LinOp* b, LinOp* x) const override {} - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override {} }; diff --git a/core/test/solver/bicg.cpp b/core/test/solver/bicg.cpp index f29620ad2e7..c73cefbbeb2 100644 --- a/core/test/solver/bicg.cpp +++ b/core/test/solver/bicg.cpp @@ -79,7 +79,7 @@ class Bicg : public ::testing::Test { std::unique_ptr bicg_factory; std::unique_ptr solver; - static void assert_same_matrices(const Mtx *m1, const Mtx *m2) + static void assert_same_matrices(const Mtx* m1, const Mtx* m2) { ASSERT_EQ(m1->get_size()[0], m2->get_size()[0]); ASSERT_EQ(m1->get_size()[1], m2->get_size()[1]); @@ -105,7 +105,7 @@ TYPED_TEST(Bicg, BicgFactoryCreatesCorrectSolver) using Solver = typename TestFixture::Solver; ASSERT_EQ(this->solver->get_size(), gko::dim<2>(3, 3)); - auto bicg_solver = static_cast(this->solver.get()); + auto bicg_solver = static_cast(this->solver.get()); ASSERT_NE(bicg_solver->get_system_matrix(), nullptr); ASSERT_EQ(bicg_solver->get_system_matrix(), this->mtx); } @@ -120,8 +120,8 @@ TYPED_TEST(Bicg, CanBeCopied) copy->copy_from(this->solver.get()); ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); - auto copy_mtx = static_cast(copy.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(copy_mtx.get()), + auto copy_mtx = static_cast(copy.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(copy_mtx.get()), this->mtx.get()); } @@ -135,8 +135,8 @@ TYPED_TEST(Bicg, CanBeMoved) copy->copy_from(std::move(this->solver)); ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); - auto copy_mtx = static_cast(copy.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(copy_mtx.get()), + auto copy_mtx = static_cast(copy.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(copy_mtx.get()), this->mtx.get()); } @@ -148,8 +148,8 @@ TYPED_TEST(Bicg, CanBeCloned) auto clone = this->solver->clone(); ASSERT_EQ(clone->get_size(), gko::dim<2>(3, 3)); - auto clone_mtx = static_cast(clone.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(clone_mtx.get()), + auto clone_mtx = static_cast(clone.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(clone_mtx.get()), this->mtx.get()); } @@ -161,7 +161,7 @@ TYPED_TEST(Bicg, CanBeCleared) ASSERT_EQ(this->solver->get_size(), gko::dim<2>(0, 0)); auto solver_mtx = - static_cast(this->solver.get())->get_system_matrix(); + static_cast(this->solver.get())->get_system_matrix(); ASSERT_EQ(solver_mtx, nullptr); } @@ -193,8 +193,8 @@ TYPED_TEST(Bicg, CanSetPreconditionerGenerator) .on(this->exec)) .on(this->exec); auto solver = bicg_factory->generate(this->mtx); - auto precond = dynamic_cast *>( - static_cast *>(solver.get()) + auto precond = dynamic_cast*>( + static_cast*>(solver.get()) ->get_preconditioner() .get()); @@ -244,7 +244,7 @@ TYPED_TEST(Bicg, CanSetCriteriaAgain) solver->set_stop_criterion_factory(new_crit); auto new_crit_fac = solver->get_stop_criterion_factory(); auto niter = - static_cast(new_crit_fac.get()) + static_cast(new_crit_fac.get()) ->get_parameters() .max_iters; diff --git a/core/test/solver/bicgstab.cpp b/core/test/solver/bicgstab.cpp index 9ad4e883b5a..e63f2852f8d 100644 --- a/core/test/solver/bicgstab.cpp +++ b/core/test/solver/bicgstab.cpp @@ -77,7 +77,7 @@ class Bicgstab : public ::testing::Test { std::unique_ptr bicgstab_factory; std::unique_ptr solver; - static void assert_same_matrices(const Mtx *m1, const Mtx *m2) + static void assert_same_matrices(const Mtx* m1, const Mtx* m2) { ASSERT_EQ(m1->get_size()[0], m2->get_size()[0]); ASSERT_EQ(m1->get_size()[1], m2->get_size()[1]); @@ -102,7 +102,7 @@ TYPED_TEST(Bicgstab, BicgstabFactoryCreatesCorrectSolver) { using Solver = typename TestFixture::Solver; ASSERT_EQ(this->solver->get_size(), gko::dim<2>(3, 3)); - auto bicgstab_solver = static_cast(this->solver.get()); + auto bicgstab_solver = static_cast(this->solver.get()); ASSERT_NE(bicgstab_solver->get_system_matrix(), nullptr); ASSERT_EQ(bicgstab_solver->get_system_matrix(), this->mtx); } @@ -117,8 +117,8 @@ TYPED_TEST(Bicgstab, CanBeCopied) copy->copy_from(this->solver.get()); ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); - auto copy_mtx = static_cast(copy.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(copy_mtx.get()), + auto copy_mtx = static_cast(copy.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(copy_mtx.get()), this->mtx.get()); } @@ -132,8 +132,8 @@ TYPED_TEST(Bicgstab, CanBeMoved) copy->copy_from(std::move(this->solver)); ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); - auto copy_mtx = static_cast(copy.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(copy_mtx.get()), + auto copy_mtx = static_cast(copy.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(copy_mtx.get()), this->mtx.get()); } @@ -145,8 +145,8 @@ TYPED_TEST(Bicgstab, CanBeCloned) auto clone = this->solver->clone(); ASSERT_EQ(clone->get_size(), gko::dim<2>(3, 3)); - auto clone_mtx = static_cast(clone.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(clone_mtx.get()), + auto clone_mtx = static_cast(clone.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(clone_mtx.get()), this->mtx.get()); } @@ -158,7 +158,7 @@ TYPED_TEST(Bicgstab, CanBeCleared) ASSERT_EQ(this->solver->get_size(), gko::dim<2>(0, 0)); auto solver_mtx = - static_cast(this->solver.get())->get_system_matrix(); + static_cast(this->solver.get())->get_system_matrix(); ASSERT_EQ(solver_mtx, nullptr); } @@ -186,7 +186,7 @@ TYPED_TEST(Bicgstab, CanSetPreconditionerGenerator) .on(this->exec); auto solver = bicgstab_factory->generate(this->mtx); - auto precond = dynamic_cast *>( + auto precond = dynamic_cast*>( gko::lend(solver->get_preconditioner())); ASSERT_NE(precond, nullptr); @@ -212,7 +212,7 @@ TYPED_TEST(Bicgstab, CanSetCriteriaAgain) solver->set_stop_criterion_factory(new_crit); auto new_crit_fac = solver->get_stop_criterion_factory(); auto niter = - static_cast(new_crit_fac.get()) + static_cast(new_crit_fac.get()) ->get_parameters() .max_iters; diff --git a/core/test/solver/cb_gmres.cpp b/core/test/solver/cb_gmres.cpp index 8cf060190f6..9a06ae6fa2d 100644 --- a/core/test/solver/cb_gmres.cpp +++ b/core/test/solver/cb_gmres.cpp @@ -100,7 +100,7 @@ class CbGmres : public ::testing::Test { std::unique_ptr cb_gmres_big_factory; std::unique_ptr big_solver; - static void assert_same_matrices(const Mtx *m1, const Mtx *m2) + static void assert_same_matrices(const Mtx* m1, const Mtx* m2) { ASSERT_EQ(m1->get_size()[0], m2->get_size()[0]); ASSERT_EQ(m1->get_size()[1], m2->get_size()[1]); @@ -157,7 +157,7 @@ TYPED_TEST(CbGmres, CbGmresFactoryCreatesCorrectSolver) using Solver = typename TestFixture::Solver; ASSERT_EQ(this->solver->get_size(), gko::dim<2>(3, 3)); - auto cb_gmres_solver = static_cast(this->solver.get()); + auto cb_gmres_solver = static_cast(this->solver.get()); ASSERT_NE(cb_gmres_solver->get_system_matrix(), nullptr); ASSERT_EQ(cb_gmres_solver->get_system_matrix(), this->mtx); ASSERT_EQ(cb_gmres_solver->get_krylov_dim(), 100u); @@ -171,13 +171,13 @@ TYPED_TEST(CbGmres, CanBeCopied) using Mtx = typename TestFixture::Mtx; using Solver = typename TestFixture::Solver; auto copy = this->cb_gmres_factory->generate(Mtx::create(this->exec)); - auto r_copy = static_cast(copy.get()); + auto r_copy = static_cast(copy.get()); copy->copy_from(this->solver.get()); ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); auto copy_mtx = r_copy->get_system_matrix(); - this->assert_same_matrices(static_cast(copy_mtx.get()), + this->assert_same_matrices(static_cast(copy_mtx.get()), this->mtx.get()); ASSERT_EQ(r_copy->get_storage_precision(), this->solver->get_storage_precision()); @@ -190,13 +190,13 @@ TYPED_TEST(CbGmres, CanBeMoved) using Mtx = typename TestFixture::Mtx; using Solver = typename TestFixture::Solver; auto copy = this->cb_gmres_factory->generate(Mtx::create(this->exec)); - auto r_copy = static_cast(copy.get()); + auto r_copy = static_cast(copy.get()); copy->copy_from(std::move(this->solver)); ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); auto copy_mtx = r_copy->get_system_matrix(); - this->assert_same_matrices(static_cast(copy_mtx.get()), + this->assert_same_matrices(static_cast(copy_mtx.get()), this->mtx.get()); ASSERT_EQ(r_copy->get_storage_precision(), this->storage_precision); ASSERT_EQ(r_copy->get_krylov_dim(), 100u); @@ -208,11 +208,11 @@ TYPED_TEST(CbGmres, CanBeCloned) using Mtx = typename TestFixture::Mtx; using Solver = typename TestFixture::Solver; auto clone = this->solver->clone(); - auto r_clone = static_cast(clone.get()); + auto r_clone = static_cast(clone.get()); ASSERT_EQ(clone->get_size(), gko::dim<2>(3, 3)); auto clone_mtx = r_clone->get_system_matrix(); - this->assert_same_matrices(static_cast(clone_mtx.get()), + this->assert_same_matrices(static_cast(clone_mtx.get()), this->mtx.get()); ASSERT_EQ(r_clone->get_storage_precision(), this->solver->get_storage_precision()); @@ -227,7 +227,7 @@ TYPED_TEST(CbGmres, CanBeCleared) ASSERT_EQ(this->solver->get_size(), gko::dim<2>(0, 0)); auto solver_mtx = - static_cast(this->solver.get())->get_system_matrix(); + static_cast(this->solver.get())->get_system_matrix(); ASSERT_EQ(solver_mtx, nullptr); } @@ -253,7 +253,7 @@ TYPED_TEST(CbGmres, CanSetPreconditionerGenerator) .on(this->exec); auto solver = cb_gmres_factory->generate(this->mtx); auto precond = - static_cast(solver.get()->get_preconditioner().get()); + static_cast(solver.get()->get_preconditioner().get()); ASSERT_NE(precond, nullptr); ASSERT_EQ(precond->get_size(), gko::dim<2>(3, 3)); diff --git a/core/test/solver/cg.cpp b/core/test/solver/cg.cpp index d4314788d7b..81fad17176d 100644 --- a/core/test/solver/cg.cpp +++ b/core/test/solver/cg.cpp @@ -79,7 +79,7 @@ class Cg : public ::testing::Test { std::unique_ptr cg_factory; std::unique_ptr solver; - static void assert_same_matrices(const Mtx *m1, const Mtx *m2) + static void assert_same_matrices(const Mtx* m1, const Mtx* m2) { ASSERT_EQ(m1->get_size()[0], m2->get_size()[0]); ASSERT_EQ(m1->get_size()[1], m2->get_size()[1]); @@ -105,7 +105,7 @@ TYPED_TEST(Cg, CgFactoryCreatesCorrectSolver) using Solver = typename TestFixture::Solver; ASSERT_EQ(this->solver->get_size(), gko::dim<2>(3, 3)); - auto cg_solver = static_cast(this->solver.get()); + auto cg_solver = static_cast(this->solver.get()); ASSERT_NE(cg_solver->get_system_matrix(), nullptr); ASSERT_EQ(cg_solver->get_system_matrix(), this->mtx); } @@ -120,8 +120,8 @@ TYPED_TEST(Cg, CanBeCopied) copy->copy_from(this->solver.get()); ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); - auto copy_mtx = static_cast(copy.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(copy_mtx.get()), + auto copy_mtx = static_cast(copy.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(copy_mtx.get()), this->mtx.get()); } @@ -135,8 +135,8 @@ TYPED_TEST(Cg, CanBeMoved) copy->copy_from(std::move(this->solver)); ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); - auto copy_mtx = static_cast(copy.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(copy_mtx.get()), + auto copy_mtx = static_cast(copy.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(copy_mtx.get()), this->mtx.get()); } @@ -148,8 +148,8 @@ TYPED_TEST(Cg, CanBeCloned) auto clone = this->solver->clone(); ASSERT_EQ(clone->get_size(), gko::dim<2>(3, 3)); - auto clone_mtx = static_cast(clone.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(clone_mtx.get()), + auto clone_mtx = static_cast(clone.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(clone_mtx.get()), this->mtx.get()); } @@ -161,7 +161,7 @@ TYPED_TEST(Cg, CanBeCleared) ASSERT_EQ(this->solver->get_size(), gko::dim<2>(0, 0)); auto solver_mtx = - static_cast(this->solver.get())->get_system_matrix(); + static_cast(this->solver.get())->get_system_matrix(); ASSERT_EQ(solver_mtx, nullptr); } @@ -192,8 +192,8 @@ TYPED_TEST(Cg, CanSetPreconditionerGenerator) .on(this->exec)) .on(this->exec); auto solver = cg_factory->generate(this->mtx); - auto precond = dynamic_cast *>( - static_cast *>(solver.get()) + auto precond = dynamic_cast*>( + static_cast*>(solver.get()) ->get_preconditioner() .get()); @@ -243,7 +243,7 @@ TYPED_TEST(Cg, CanSetCriteriaAgain) solver->set_stop_criterion_factory(new_crit); auto new_crit_fac = solver->get_stop_criterion_factory(); auto niter = - static_cast(new_crit_fac.get()) + static_cast(new_crit_fac.get()) ->get_parameters() .max_iters; diff --git a/core/test/solver/cgs.cpp b/core/test/solver/cgs.cpp index a9bcc316346..225ad434a77 100644 --- a/core/test/solver/cgs.cpp +++ b/core/test/solver/cgs.cpp @@ -79,7 +79,7 @@ class Cgs : public ::testing::Test { std::unique_ptr cgs_factory; std::unique_ptr solver; - static void assert_same_matrices(const Mtx *m1, const Mtx *m2) + static void assert_same_matrices(const Mtx* m1, const Mtx* m2) { ASSERT_EQ(m1->get_size()[0], m2->get_size()[0]); ASSERT_EQ(m1->get_size()[1], m2->get_size()[1]); @@ -104,7 +104,7 @@ TYPED_TEST(Cgs, CgsFactoryCreatesCorrectSolver) { using Solver = typename TestFixture::Solver; ASSERT_EQ(this->solver->get_size(), gko::dim<2>(3, 3)); - auto cgs_solver = static_cast(this->solver.get()); + auto cgs_solver = static_cast(this->solver.get()); ASSERT_NE(cgs_solver->get_system_matrix(), nullptr); ASSERT_EQ(cgs_solver->get_system_matrix(), this->mtx); } @@ -119,8 +119,8 @@ TYPED_TEST(Cgs, CanBeCopied) copy->copy_from(this->solver.get()); ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); - auto copy_mtx = static_cast(copy.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(copy_mtx.get()), + auto copy_mtx = static_cast(copy.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(copy_mtx.get()), this->mtx.get()); } @@ -134,8 +134,8 @@ TYPED_TEST(Cgs, CanBeMoved) copy->copy_from(std::move(this->solver)); ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); - auto copy_mtx = static_cast(copy.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(copy_mtx.get()), + auto copy_mtx = static_cast(copy.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(copy_mtx.get()), this->mtx.get()); } @@ -147,8 +147,8 @@ TYPED_TEST(Cgs, CanBeCloned) auto clone = this->solver->clone(); ASSERT_EQ(clone->get_size(), gko::dim<2>(3, 3)); - auto clone_mtx = static_cast(clone.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(clone_mtx.get()), + auto clone_mtx = static_cast(clone.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(clone_mtx.get()), this->mtx.get()); } @@ -160,7 +160,7 @@ TYPED_TEST(Cgs, CanBeCleared) ASSERT_EQ(this->solver->get_size(), gko::dim<2>(0, 0)); auto solver_mtx = - static_cast(this->solver.get())->get_system_matrix(); + static_cast(this->solver.get())->get_system_matrix(); ASSERT_EQ(solver_mtx, nullptr); } @@ -192,8 +192,8 @@ TYPED_TEST(Cgs, CanSetPreconditionerGenerator) .on(this->exec)) .on(this->exec); auto solver = cgs_factory->generate(this->mtx); - auto precond = dynamic_cast *>( - static_cast *>(solver.get()) + auto precond = dynamic_cast*>( + static_cast*>(solver.get()) ->get_preconditioner() .get()); @@ -219,7 +219,7 @@ TYPED_TEST(Cgs, CanSetCriteriaAgain) solver->set_stop_criterion_factory(new_crit); auto new_crit_fac = solver->get_stop_criterion_factory(); auto niter = - static_cast(new_crit_fac.get()) + static_cast(new_crit_fac.get()) ->get_parameters() .max_iters; diff --git a/core/test/solver/fcg.cpp b/core/test/solver/fcg.cpp index 43052f011e4..4852e92876f 100644 --- a/core/test/solver/fcg.cpp +++ b/core/test/solver/fcg.cpp @@ -90,7 +90,7 @@ TYPED_TEST(Fcg, FcgFactoryCreatesCorrectSolver) { using Solver = typename TestFixture::Solver; ASSERT_EQ(this->solver->get_size(), gko::dim<2>(3, 3)); - auto fcg_solver = dynamic_cast(this->solver.get()); + auto fcg_solver = dynamic_cast(this->solver.get()); ASSERT_NE(fcg_solver->get_system_matrix(), nullptr); ASSERT_EQ(fcg_solver->get_system_matrix(), this->mtx); } @@ -105,8 +105,8 @@ TYPED_TEST(Fcg, CanBeCopied) copy->copy_from(this->solver.get()); ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); - auto copy_mtx = dynamic_cast(copy.get())->get_system_matrix(); - GKO_ASSERT_MTX_NEAR(dynamic_cast(copy_mtx.get()), + auto copy_mtx = dynamic_cast(copy.get())->get_system_matrix(); + GKO_ASSERT_MTX_NEAR(dynamic_cast(copy_mtx.get()), this->mtx.get(), 0.0); } @@ -120,8 +120,8 @@ TYPED_TEST(Fcg, CanBeMoved) copy->copy_from(std::move(this->solver)); ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); - auto copy_mtx = dynamic_cast(copy.get())->get_system_matrix(); - GKO_ASSERT_MTX_NEAR(dynamic_cast(copy_mtx.get()), + auto copy_mtx = dynamic_cast(copy.get())->get_system_matrix(); + GKO_ASSERT_MTX_NEAR(dynamic_cast(copy_mtx.get()), this->mtx.get(), 0.0); } @@ -133,8 +133,8 @@ TYPED_TEST(Fcg, CanBeCloned) auto clone = this->solver->clone(); ASSERT_EQ(clone->get_size(), gko::dim<2>(3, 3)); - auto clone_mtx = dynamic_cast(clone.get())->get_system_matrix(); - GKO_ASSERT_MTX_NEAR(dynamic_cast(clone_mtx.get()), + auto clone_mtx = dynamic_cast(clone.get())->get_system_matrix(); + GKO_ASSERT_MTX_NEAR(dynamic_cast(clone_mtx.get()), this->mtx.get(), 0.0); } @@ -146,7 +146,7 @@ TYPED_TEST(Fcg, CanBeCleared) ASSERT_EQ(this->solver->get_size(), gko::dim<2>(0, 0)); auto solver_mtx = - static_cast(this->solver.get())->get_system_matrix(); + static_cast(this->solver.get())->get_system_matrix(); ASSERT_EQ(solver_mtx, nullptr); } @@ -177,8 +177,8 @@ TYPED_TEST(Fcg, CanSetPreconditionerGenerator) .on(this->exec)) .on(this->exec); auto solver = fcg_factory->generate(this->mtx); - auto precond = dynamic_cast *>( - static_cast *>(solver.get()) + auto precond = dynamic_cast*>( + static_cast*>(solver.get()) ->get_preconditioner() .get()); @@ -204,7 +204,7 @@ TYPED_TEST(Fcg, CanSetCriteriaAgain) solver->set_stop_criterion_factory(new_crit); auto new_crit_fac = solver->get_stop_criterion_factory(); auto niter = - static_cast(new_crit_fac.get()) + static_cast(new_crit_fac.get()) ->get_parameters() .max_iters; diff --git a/core/test/solver/gmres.cpp b/core/test/solver/gmres.cpp index 3f5b9510332..8c17b30b4b3 100644 --- a/core/test/solver/gmres.cpp +++ b/core/test/solver/gmres.cpp @@ -95,7 +95,7 @@ class Gmres : public ::testing::Test { std::unique_ptr gmres_big_factory; std::unique_ptr big_solver; - static void assert_same_matrices(const Mtx *m1, const Mtx *m2) + static void assert_same_matrices(const Mtx* m1, const Mtx* m2) { ASSERT_EQ(m1->get_size()[0], m2->get_size()[0]); ASSERT_EQ(m1->get_size()[1], m2->get_size()[1]); @@ -123,7 +123,7 @@ TYPED_TEST(Gmres, GmresFactoryCreatesCorrectSolver) { using Solver = typename TestFixture::Solver; ASSERT_EQ(this->solver->get_size(), gko::dim<2>(3, 3)); - auto gmres_solver = static_cast(this->solver.get()); + auto gmres_solver = static_cast(this->solver.get()); ASSERT_NE(gmres_solver->get_system_matrix(), nullptr); ASSERT_EQ(gmres_solver->get_system_matrix(), this->mtx); } @@ -138,8 +138,8 @@ TYPED_TEST(Gmres, CanBeCopied) copy->copy_from(this->solver.get()); ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); - auto copy_mtx = static_cast(copy.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(copy_mtx.get()), + auto copy_mtx = static_cast(copy.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(copy_mtx.get()), this->mtx.get()); } @@ -153,8 +153,8 @@ TYPED_TEST(Gmres, CanBeMoved) copy->copy_from(std::move(this->solver)); ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); - auto copy_mtx = static_cast(copy.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(copy_mtx.get()), + auto copy_mtx = static_cast(copy.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(copy_mtx.get()), this->mtx.get()); } @@ -166,8 +166,8 @@ TYPED_TEST(Gmres, CanBeCloned) auto clone = this->solver->clone(); ASSERT_EQ(clone->get_size(), gko::dim<2>(3, 3)); - auto clone_mtx = static_cast(clone.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(clone_mtx.get()), + auto clone_mtx = static_cast(clone.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(clone_mtx.get()), this->mtx.get()); } @@ -179,7 +179,7 @@ TYPED_TEST(Gmres, CanBeCleared) ASSERT_EQ(this->solver->get_size(), gko::dim<2>(0, 0)); auto solver_mtx = - static_cast(this->solver.get())->get_system_matrix(); + static_cast(this->solver.get())->get_system_matrix(); ASSERT_EQ(solver_mtx, nullptr); } @@ -209,8 +209,8 @@ TYPED_TEST(Gmres, CanSetPreconditionerGenerator) .on(this->exec)) .on(this->exec); auto solver = gmres_factory->generate(this->mtx); - auto precond = dynamic_cast *>( - static_cast *>(solver.get()) + auto precond = dynamic_cast*>( + static_cast*>(solver.get()) ->get_preconditioner() .get()); @@ -237,7 +237,7 @@ TYPED_TEST(Gmres, CanSetCriteriaAgain) solver->set_stop_criterion_factory(new_crit); auto new_crit_fac = solver->get_stop_criterion_factory(); auto niter = - static_cast(new_crit_fac.get()) + static_cast(new_crit_fac.get()) ->get_parameters() .max_iters; diff --git a/core/test/solver/idr.cpp b/core/test/solver/idr.cpp index 62dfc1c7e03..0811903f37d 100644 --- a/core/test/solver/idr.cpp +++ b/core/test/solver/idr.cpp @@ -77,7 +77,7 @@ class Idr : public ::testing::Test { std::unique_ptr idr_factory; std::unique_ptr solver; - static void assert_same_matrices(const Mtx *m1, const Mtx *m2) + static void assert_same_matrices(const Mtx* m1, const Mtx* m2) { ASSERT_EQ(m1->get_size()[0], m2->get_size()[0]); ASSERT_EQ(m1->get_size()[1], m2->get_size()[1]); @@ -102,7 +102,7 @@ TYPED_TEST(Idr, IdrFactoryCreatesCorrectSolver) { using Solver = typename TestFixture::Solver; ASSERT_EQ(this->solver->get_size(), gko::dim<2>(3, 3)); - auto idr_solver = static_cast(this->solver.get()); + auto idr_solver = static_cast(this->solver.get()); ASSERT_NE(idr_solver->get_system_matrix(), nullptr); ASSERT_EQ(idr_solver->get_system_matrix(), this->mtx); } @@ -117,8 +117,8 @@ TYPED_TEST(Idr, CanBeCopied) copy->copy_from(this->solver.get()); ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); - auto copy_mtx = static_cast(copy.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(copy_mtx.get()), + auto copy_mtx = static_cast(copy.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(copy_mtx.get()), this->mtx.get()); } @@ -132,8 +132,8 @@ TYPED_TEST(Idr, CanBeMoved) copy->copy_from(std::move(this->solver)); ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); - auto copy_mtx = static_cast(copy.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(copy_mtx.get()), + auto copy_mtx = static_cast(copy.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(copy_mtx.get()), this->mtx.get()); } @@ -146,8 +146,8 @@ TYPED_TEST(Idr, CanBeCloned) auto clone = this->solver->clone(); ASSERT_EQ(clone->get_size(), gko::dim<2>(3, 3)); - auto clone_mtx = static_cast(clone.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(clone_mtx.get()), + auto clone_mtx = static_cast(clone.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(clone_mtx.get()), this->mtx.get()); } @@ -160,7 +160,7 @@ TYPED_TEST(Idr, CanBeCleared) ASSERT_EQ(this->solver->get_size(), gko::dim<2>(0, 0)); auto solver_mtx = - static_cast(this->solver.get())->get_system_matrix(); + static_cast(this->solver.get())->get_system_matrix(); ASSERT_EQ(solver_mtx, nullptr); } @@ -188,7 +188,7 @@ TYPED_TEST(Idr, CanSetPreconditionerGenerator) .on(this->exec); auto solver = idr_factory->generate(this->mtx); - auto precond = dynamic_cast *>( + auto precond = dynamic_cast*>( gko::lend(solver->get_preconditioner())); ASSERT_NE(precond, nullptr); @@ -213,7 +213,7 @@ TYPED_TEST(Idr, CanSetCriteriaAgain) solver->set_stop_criterion_factory(new_crit); auto new_crit_fac = solver->get_stop_criterion_factory(); auto niter = - static_cast(new_crit_fac.get()) + static_cast(new_crit_fac.get()) ->get_parameters() .max_iters; diff --git a/core/test/solver/ir.cpp b/core/test/solver/ir.cpp index 93d18c99260..7e0da4c28b3 100644 --- a/core/test/solver/ir.cpp +++ b/core/test/solver/ir.cpp @@ -79,7 +79,7 @@ class Ir : public ::testing::Test { std::unique_ptr ir_factory; std::unique_ptr solver; - static void assert_same_matrices(const Mtx *m1, const Mtx *m2) + static void assert_same_matrices(const Mtx* m1, const Mtx* m2) { ASSERT_EQ(m1->get_size()[0], m2->get_size()[0]); ASSERT_EQ(m1->get_size()[1], m2->get_size()[1]); @@ -104,7 +104,7 @@ TYPED_TEST(Ir, IrFactoryCreatesCorrectSolver) { using Solver = typename TestFixture::Solver; ASSERT_EQ(this->solver->get_size(), gko::dim<2>(3, 3)); - auto cg_solver = static_cast(this->solver.get()); + auto cg_solver = static_cast(this->solver.get()); ASSERT_NE(cg_solver->get_system_matrix(), nullptr); ASSERT_EQ(cg_solver->get_system_matrix(), this->mtx); } @@ -119,8 +119,8 @@ TYPED_TEST(Ir, CanBeCopied) copy->copy_from(this->solver.get()); ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); - auto copy_mtx = static_cast(copy.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(copy_mtx.get()), + auto copy_mtx = static_cast(copy.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(copy_mtx.get()), this->mtx.get()); } @@ -134,8 +134,8 @@ TYPED_TEST(Ir, CanBeMoved) copy->copy_from(std::move(this->solver)); ASSERT_EQ(copy->get_size(), gko::dim<2>(3, 3)); - auto copy_mtx = static_cast(copy.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(copy_mtx.get()), + auto copy_mtx = static_cast(copy.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(copy_mtx.get()), this->mtx.get()); } @@ -147,8 +147,8 @@ TYPED_TEST(Ir, CanBeCloned) auto clone = this->solver->clone(); ASSERT_EQ(clone->get_size(), gko::dim<2>(3, 3)); - auto clone_mtx = static_cast(clone.get())->get_system_matrix(); - this->assert_same_matrices(static_cast(clone_mtx.get()), + auto clone_mtx = static_cast(clone.get())->get_system_matrix(); + this->assert_same_matrices(static_cast(clone_mtx.get()), this->mtx.get()); } @@ -160,7 +160,7 @@ TYPED_TEST(Ir, CanBeCleared) ASSERT_EQ(this->solver->get_size(), gko::dim<2>(0, 0)); auto solver_mtx = - static_cast(this->solver.get())->get_system_matrix(); + static_cast(this->solver.get())->get_system_matrix(); ASSERT_EQ(solver_mtx, nullptr); } @@ -190,8 +190,8 @@ TYPED_TEST(Ir, CanSetInnerSolverInFactory) .on(this->exec)) .on(this->exec); auto solver = ir_factory->generate(this->mtx); - auto inner_solver = dynamic_cast( - static_cast(solver.get())->get_solver().get()); + auto inner_solver = dynamic_cast( + static_cast(solver.get())->get_solver().get()); ASSERT_NE(inner_solver, nullptr); ASSERT_EQ(inner_solver->get_size(), gko::dim<2>(3, 3)); @@ -239,7 +239,7 @@ TYPED_TEST(Ir, CanSetCriteriaAgain) solver->set_stop_criterion_factory(new_crit); auto new_crit_fac = solver->get_stop_criterion_factory(); auto niter = - static_cast(new_crit_fac.get()) + static_cast(new_crit_fac.get()) ->get_parameters() .max_iters; diff --git a/core/test/utils/array_generator.hpp b/core/test/utils/array_generator.hpp index 8dbdeacc9a7..762b7ae4ba2 100644 --- a/core/test/utils/array_generator.hpp +++ b/core/test/utils/array_generator.hpp @@ -62,8 +62,8 @@ namespace test { */ template Array generate_random_array(size_type num, - ValueDistribution &&value_dist, - Engine &&engine, + ValueDistribution&& value_dist, + Engine&& engine, std::shared_ptr exec) { Array array(exec->get_master(), num); diff --git a/core/test/utils/array_generator_test.cpp b/core/test/utils/array_generator_test.cpp index ba8c6651be9..e2c92f2785b 100644 --- a/core/test/utils/array_generator_test.cpp +++ b/core/test/utils/array_generator_test.cpp @@ -111,13 +111,13 @@ TYPED_TEST(ArrayGenerator, OutputHasCorrectAverageAndDeviation) this->template check_average_and_deviation( this->array.get_const_data(), this->array.get_const_data() + this->array.get_num_elems(), 20.0, 5.0, - [](T &val) { return gko::real(val); }); + [](T& val) { return gko::real(val); }); // check the imag part when the type is complex if (!std::is_same>::value) { this->template check_average_and_deviation( this->array.get_const_data(), this->array.get_const_data() + this->array.get_num_elems(), 20.0, - 5.0, [](T &val) { return gko::imag(val); }); + 5.0, [](T& val) { return gko::imag(val); }); } } diff --git a/core/test/utils/assertions.hpp b/core/test/utils/assertions.hpp index 7583bd40f8c..6f15997d25a 100644 --- a/core/test/utils/assertions.hpp +++ b/core/test/utils/assertions.hpp @@ -134,7 +134,7 @@ class biggest_valuetype< template -auto get_next_value(NonzeroIterator &it, const NonzeroIterator &end, +auto get_next_value(NonzeroIterator& it, const NonzeroIterator& end, size_type next_row, size_type next_col) -> typename std::decayvalue)>::type { @@ -147,7 +147,7 @@ auto get_next_value(NonzeroIterator &it, const NonzeroIterator &end, template -void print_matrix(Ostream &os, const MatrixData &data) +void print_matrix(Ostream& os, const MatrixData& data) { auto it = begin(data.nonzeros); for (size_type row = 0; row < data.size[0]; ++row) { @@ -161,8 +161,8 @@ void print_matrix(Ostream &os, const MatrixData &data) template -void print_componentwise_error(Ostream &os, const MatrixData1 &first, - const MatrixData2 &second) +void print_componentwise_error(Ostream& os, const MatrixData1& first, + const MatrixData2& second) { using std::abs; using vt = typename detail::biggest_valuetype< @@ -191,7 +191,7 @@ void print_componentwise_error(Ostream &os, const MatrixData1 &first, } template -void print_columns(Ostream &os, const Iterator &begin, const Iterator &end) +void print_columns(Ostream& os, const Iterator& begin, const Iterator& end) { for (auto it = begin; it != end; ++it) { os << '\t' << it->column; @@ -201,7 +201,7 @@ void print_columns(Ostream &os, const Iterator &begin, const Iterator &end) template -double get_relative_error(const MatrixData1 &first, const MatrixData2 &second) +double get_relative_error(const MatrixData1& first, const MatrixData2& second) { using std::abs; using vt = typename detail::biggest_valuetype< @@ -235,9 +235,9 @@ double get_relative_error(const MatrixData1 &first, const MatrixData2 &second) template ::testing::AssertionResult matrices_near_impl( - const std::string &first_expression, const std::string &second_expression, - const std::string &tolerance_expression, const MatrixData1 &first, - const MatrixData2 &second, double tolerance) + const std::string& first_expression, const std::string& second_expression, + const std::string& tolerance_expression, const MatrixData1& first, + const MatrixData2& second, double tolerance) { auto num_rows = first.size[0]; auto num_cols = first.size[1]; @@ -302,8 +302,8 @@ ::testing::AssertionResult matrices_near_impl( template ::testing::AssertionResult matrices_equal_sparsity_impl( - const std::string &first_expression, const std::string &second_expression, - const MatrixData1 &first, const MatrixData2 &second) + const std::string& first_expression, const std::string& second_expression, + const MatrixData1& first, const MatrixData2& second) { auto num_rows = first.size[0]; auto num_cols = first.size[1]; @@ -359,8 +359,8 @@ ::testing::AssertionResult matrices_equal_sparsity_impl( template ::testing::AssertionResult array_equal_impl( - const std::string &first_expression, const std::string &second_expression, - const Array &first, const Array &second) + const std::string& first_expression, const std::string& second_expression, + const Array& first, const Array& second) { const auto num_elems1 = first.get_num_elems(); const auto num_elems2 = second.get_num_elems(); @@ -411,8 +411,8 @@ ::testing::AssertionResult array_equal_impl( ::testing::AssertionResult str_contains_impl( - const std::string &first_expression, const std::string &second_expression, - const std::string &string1, const std::string &string2) + const std::string& first_expression, const std::string& second_expression, + const std::string& string1, const std::string& string2) { if (string1.find(string2) != std::string::npos) { return ::testing::AssertionSuccess(); @@ -441,14 +441,14 @@ template using remove_container = typename remove_container_impl::type; -std::string remove_pointer_wrapper(const std::string &expression) +std::string remove_pointer_wrapper(const std::string& expression) { constexpr auto prefix_len = sizeof("plain_ptr(") - 1; return expression.substr(prefix_len, expression.size() - prefix_len - 1); } -std::string remove_list_wrapper(const std::string &expression) +std::string remove_list_wrapper(const std::string& expression) { constexpr auto prefix_len = sizeof("l(") - 1; return expression.substr(prefix_len, expression.size() - prefix_len - 1); @@ -477,9 +477,9 @@ std::string remove_list_wrapper(const std::string &expression) * @see GKO_EXPECT_MTX_NEAR */ template -::testing::AssertionResult values_near(const std::string &first_expression, - const std::string &second_expression, - const std::string &tolerance_expression, +::testing::AssertionResult values_near(const std::string& first_expression, + const std::string& second_expression, + const std::string& tolerance_expression, T val1, U val2, double abs_error) { static_assert(std::is_same(), @@ -499,8 +499,8 @@ ::testing::AssertionResult values_near(const std::string &first_expression, template <> ::testing::AssertionResult values_near( - const std::string &first_expression, const std::string &second_expression, - const std::string &tolerance_expression, gko::half val1, gko::half val2, + const std::string& first_expression, const std::string& second_expression, + const std::string& tolerance_expression, gko::half val1, gko::half val2, double abs_error) { using T = float32; @@ -519,8 +519,8 @@ ::testing::AssertionResult values_near( template <> ::testing::AssertionResult values_near, std::complex>( - const std::string &first_expression, const std::string &second_expression, - const std::string &tolerance_expression, std::complex val1, + const std::string& first_expression, const std::string& second_expression, + const std::string& tolerance_expression, std::complex val1, std::complex val2, double abs_error) { using T = std::complex; @@ -563,9 +563,9 @@ ::testing::AssertionResult values_near, std::complex>( */ template ::testing::AssertionResult matrices_near( - const std::string &first_expression, const std::string &second_expression, - const std::string &tolerance_expression, const LinOp1 *first, - const LinOp2 *second, double tolerance) + const std::string& first_expression, const std::string& second_expression, + const std::string& tolerance_expression, const LinOp1* first, + const LinOp2* second, double tolerance) { auto exec = first->get_executor()->get_master(); matrix_data @@ -588,8 +588,8 @@ ::testing::AssertionResult matrices_near( template ::testing::AssertionResult matrices_near( - const std::string &first_expression, const std::string &second_expression, - const std::string &tolerance_expression, const LinOp1 *first, + const std::string& first_expression, const std::string& second_expression, + const std::string& tolerance_expression, const LinOp1* first, std::initializer_list second, double tolerance) { auto second_mtx = initialize>>( @@ -616,10 +616,10 @@ ::testing::AssertionResult matrices_near( * @see GKO_ASSERT_ARRAY_EQ */ template -::testing::AssertionResult array_equal(const std::string &first_expression, - const std::string &second_expression, - const Array &first, - const Array &second) +::testing::AssertionResult array_equal(const std::string& first_expression, + const std::string& second_expression, + const Array& first, + const Array& second) { return detail::array_equal_impl(first_expression, second_expression, first, second); @@ -641,10 +641,10 @@ ::testing::AssertionResult array_equal(const std::string &first_expression, * * @see GKO_ASSERT_STR_CONTAINS */ -::testing::AssertionResult str_contains(const std::string &first_expression, - const std::string &second_expression, - const std::string &string1, - const std::string &string2) +::testing::AssertionResult str_contains(const std::string& first_expression, + const std::string& second_expression, + const std::string& string1, + const std::string& string2) { return detail::str_contains_impl(first_expression, second_expression, string1, string2); @@ -675,8 +675,8 @@ ::testing::AssertionResult str_contains(const std::string &first_expression, */ template ::testing::AssertionResult matrices_equal_sparsity( - const std::string &first_expression, const std::string &second_expression, - const LinOp1 *first, const LinOp2 *second) + const std::string& first_expression, const std::string& second_expression, + const LinOp1* first, const LinOp2* second) { auto exec = first->get_executor()->get_master(); matrix_data @@ -701,51 +701,51 @@ namespace detail { template -const std::initializer_list> &l( - const std::initializer_list> &list) +const std::initializer_list>& l( + const std::initializer_list>& list) { return list; } template -const std::initializer_list &l(const std::initializer_list &list) +const std::initializer_list& l(const std::initializer_list& list) { return list; } template -T &&l(T &&matrix) +T&& l(T&& matrix) { return std::forward(matrix); } template -T *plain_ptr(const std::shared_ptr &ptr) +T* plain_ptr(const std::shared_ptr& ptr) { return ptr.get(); } template -T *plain_ptr(const std::unique_ptr &ptr) +T* plain_ptr(const std::unique_ptr& ptr) { return ptr.get(); } template -const std::initializer_list &plain_ptr(const std::initializer_list &ptr) +const std::initializer_list& plain_ptr(const std::initializer_list& ptr) { return ptr; } template -const std::initializer_list> &plain_ptr( - const std::initializer_list> &ptr) +const std::initializer_list>& plain_ptr( + const std::initializer_list>& ptr) { return ptr; } template -T *plain_ptr(T *ptr) +T* plain_ptr(T* ptr) { return ptr; } diff --git a/core/test/utils/assertions_test.cpp b/core/test/utils/assertions_test.cpp index c5ebccb10ec..4c90626b459 100644 --- a/core/test/utils/assertions_test.cpp +++ b/core/test/utils/assertions_test.cpp @@ -52,7 +52,7 @@ class MatricesNear : public ::testing::Test { using Sparse = gko::matrix::Csr<>; template - gko::Array make_view(std::array &array) + gko::Array make_view(std::array& array) { return gko::Array::view(exec, size, array.data()); } diff --git a/core/test/utils/matrix_generator.hpp b/core/test/utils/matrix_generator.hpp index 9a101f4b043..b806973f0d9 100644 --- a/core/test/utils/matrix_generator.hpp +++ b/core/test/utils/matrix_generator.hpp @@ -76,9 +76,9 @@ namespace test { template , typename NonzeroDistribution, typename ValueDistribution, typename Engine, typename... MatrixArgs> std::unique_ptr generate_random_matrix( - size_type num_rows, size_type num_cols, NonzeroDistribution &&nonzero_dist, - ValueDistribution &&value_dist, Engine &&engine, - std::shared_ptr exec, MatrixArgs &&... args) + size_type num_rows, size_type num_cols, NonzeroDistribution&& nonzero_dist, + ValueDistribution&& value_dist, Engine&& engine, + std::shared_ptr exec, MatrixArgs&&... args) { using value_type = typename MatrixType::value_type; using index_type = typename MatrixType::index_type; @@ -136,9 +136,9 @@ std::unique_ptr generate_random_matrix( template , typename NonzeroDistribution, typename Engine, typename... MatrixArgs> std::unique_ptr generate_random_sparsity_matrix( - size_type num_rows, size_type num_cols, NonzeroDistribution &&nonzero_dist, - typename MatrixType::value_type value, Engine &&engine, - std::shared_ptr exec, MatrixArgs &&... args) + size_type num_rows, size_type num_cols, NonzeroDistribution&& nonzero_dist, + typename MatrixType::value_type value, Engine&& engine, + std::shared_ptr exec, MatrixArgs&&... args) { using value_type = typename MatrixType::value_type; using index_type = typename MatrixType::index_type; @@ -200,9 +200,9 @@ template , typename NonzeroDistribution, typename ValueDistribution, typename Engine, typename... MatrixArgs> std::unique_ptr generate_random_triangular_matrix( size_type num_rows, size_type num_cols, bool ones_on_diagonal, - bool lower_triangular, NonzeroDistribution &&nonzero_dist, - ValueDistribution &&value_dist, Engine &&engine, - std::shared_ptr exec, MatrixArgs &&... args) + bool lower_triangular, NonzeroDistribution&& nonzero_dist, + ValueDistribution&& value_dist, Engine&& engine, + std::shared_ptr exec, MatrixArgs&&... args) { using value_type = typename MatrixType::value_type; using index_type = typename MatrixType::index_type; @@ -287,9 +287,8 @@ template , typename NonzeroDistribution, typename ValueDistribution, typename Engine, typename... MatrixArgs> std::unique_ptr generate_random_lower_triangular_matrix( size_type num_rows, size_type num_cols, bool ones_on_diagonal, - NonzeroDistribution &&nonzero_dist, ValueDistribution &&value_dist, - Engine &&engine, std::shared_ptr exec, - MatrixArgs &&... args) + NonzeroDistribution&& nonzero_dist, ValueDistribution&& value_dist, + Engine&& engine, std::shared_ptr exec, MatrixArgs&&... args) { return generate_random_triangular_matrix( num_rows, num_cols, ones_on_diagonal, true, nonzero_dist, value_dist, @@ -323,9 +322,8 @@ template , typename NonzeroDistribution, typename ValueDistribution, typename Engine, typename... MatrixArgs> std::unique_ptr generate_random_upper_triangular_matrix( size_type num_rows, size_type num_cols, bool ones_on_diagonal, - NonzeroDistribution &&nonzero_dist, ValueDistribution &&value_dist, - Engine &&engine, std::shared_ptr exec, - MatrixArgs &&... args) + NonzeroDistribution&& nonzero_dist, ValueDistribution&& value_dist, + Engine&& engine, std::shared_ptr exec, MatrixArgs&&... args) { return generate_random_triangular_matrix( num_rows, num_cols, ones_on_diagonal, false, nonzero_dist, value_dist, @@ -358,8 +356,8 @@ template , typename ValueDistribution, typename Engine, typename... MatrixArgs> std::unique_ptr generate_random_band_matrix( size_type size, size_type lower_bandwidth, size_type upper_bandwidth, - ValueDistribution &&value_dist, Engine &&engine, - std::shared_ptr exec, MatrixArgs &&... args) + ValueDistribution&& value_dist, Engine&& engine, + std::shared_ptr exec, MatrixArgs&&... args) { using value_type = typename MatrixType::value_type; using index_type = typename MatrixType::index_type; diff --git a/core/test/utils/matrix_generator_test.cpp b/core/test/utils/matrix_generator_test.cpp index 687560a202b..4bdbe02a46d 100644 --- a/core/test/utils/matrix_generator_test.cpp +++ b/core/test/utils/matrix_generator_test.cpp @@ -170,12 +170,12 @@ TYPED_TEST(MatrixGenerator, OutputHasCorrectValuesAverageAndDeviation) // check the real part this->template check_average_and_deviation( begin(this->values_sample), end(this->values_sample), 20.0, 5.0, - [](T &val) { return gko::real(val); }); + [](T& val) { return gko::real(val); }); // check the imag part when the type is complex if (!std::is_same>::value) { this->template check_average_and_deviation( begin(this->values_sample), end(this->values_sample), 20.0, 5.0, - [](T &val) { return gko::imag(val); }); + [](T& val) { return gko::imag(val); }); } } @@ -231,12 +231,12 @@ TYPED_TEST(MatrixGenerator, CanGenerateBandMatrix) // check the real part of elements in band this->template check_average_and_deviation( begin(this->band_values_sample), end(this->band_values_sample), 20.0, - 5.0, [](T &val) { return gko::real(val); }); + 5.0, [](T& val) { return gko::real(val); }); // check the imag part when the type is complex if (!std::is_same>::value) { this->template check_average_and_deviation( begin(this->band_values_sample), end(this->band_values_sample), - 20.0, 5.0, [](T &val) { return gko::imag(val); }); + 20.0, 5.0, [](T& val) { return gko::imag(val); }); } } diff --git a/core/test/utils/matrix_utils.hpp b/core/test/utils/matrix_utils.hpp index dc6586f07b7..5be47177dbd 100644 --- a/core/test/utils/matrix_utils.hpp +++ b/core/test/utils/matrix_utils.hpp @@ -54,7 +54,7 @@ namespace test { * @param mtx the dense matrix */ template -void make_symmetric(matrix::Dense *mtx) +void make_symmetric(matrix::Dense* mtx) { GKO_ASSERT_IS_SQUARE_MATRIX(mtx); auto mtx_host = @@ -76,7 +76,7 @@ void make_symmetric(matrix::Dense *mtx) * @param mtx the dense matrix */ template -void make_hermitian(matrix::Dense *mtx) +void make_hermitian(matrix::Dense* mtx) { GKO_ASSERT_IS_SQUARE_MATRIX(mtx); auto mtx_host = @@ -105,7 +105,7 @@ void make_hermitian(matrix::Dense *mtx) * be larger than or equal to 1. */ template -void make_diag_dominant(matrix::Dense *mtx, +void make_diag_dominant(matrix::Dense* mtx, remove_complex ratio = 1.0) { // To keep the diag dominant, the ratio should be larger than or equal to 1 @@ -134,7 +134,7 @@ void make_diag_dominant(matrix::Dense *mtx, * be larger than 1. */ template -void make_hpd(matrix::Dense *mtx, +void make_hpd(matrix::Dense* mtx, remove_complex ratio = 1.001) { GKO_ASSERT_IS_SQUARE_MATRIX(mtx); diff --git a/core/test/utils/unsort_matrix.hpp b/core/test/utils/unsort_matrix.hpp index b8064dfbade..809b8318985 100644 --- a/core/test/utils/unsort_matrix.hpp +++ b/core/test/utils/unsort_matrix.hpp @@ -53,8 +53,8 @@ namespace test { // Plan for now: shuffle values and column indices to unsort the given matrix // without changing the represented matrix. template -void unsort_matrix(matrix::Csr *mtx, - RandomEngine &&engine) +void unsort_matrix(matrix::Csr* mtx, + RandomEngine&& engine) { using value_type = ValueType; using index_type = IndexType; @@ -62,8 +62,8 @@ void unsort_matrix(matrix::Csr *mtx, if (mtx->get_num_stored_elements() <= 0) { return; } - const auto &exec = mtx->get_executor(); - const auto &master = exec->get_master(); + const auto& exec = mtx->get_executor(); + const auto& master = exec->get_master(); // If exec is not the master/host, extract the master and perform the // unsorting there, followed by copying it back @@ -91,8 +91,8 @@ void unsort_matrix(matrix::Csr *mtx, // Plan for now: shuffle values and column indices to unsort the given matrix // without changing the represented matrix. template -void unsort_matrix(matrix::Coo *mtx, - RandomEngine &&engine) +void unsort_matrix(matrix::Coo* mtx, + RandomEngine&& engine) { using value_type = ValueType; using index_type = IndexType; @@ -101,8 +101,8 @@ void unsort_matrix(matrix::Coo *mtx, return; } - const auto &exec = mtx->get_executor(); - const auto &master = exec->get_master(); + const auto& exec = mtx->get_executor(); + const auto& master = exec->get_master(); // If exec is not the master/host, extract the master and perform the // unsorting there, followed by copying it back @@ -114,7 +114,7 @@ void unsort_matrix(matrix::Coo *mtx, } matrix_data data; mtx->write(data); - auto &nonzeros = data.nonzeros; + auto& nonzeros = data.nonzeros; using nz_type = typename decltype(data)::nonzero_type; std::shuffle(nonzeros.begin(), nonzeros.end(), engine); diff --git a/core/test/utils/unsort_matrix_test.cpp b/core/test/utils/unsort_matrix_test.cpp index 90fbbc2ccf2..54983761bba 100644 --- a/core/test/utils/unsort_matrix_test.cpp +++ b/core/test/utils/unsort_matrix_test.cpp @@ -95,7 +95,7 @@ class UnsortMatrix : public ::testing::Test { I{0, 0, 2, 2, 2, 2, 3, 4, 4, 4}); } - bool is_coo_matrix_sorted(Coo *mtx) + bool is_coo_matrix_sorted(Coo* mtx) { auto rows = mtx->get_const_row_idxs(); auto cols = mtx->get_const_col_idxs(); @@ -119,7 +119,7 @@ class UnsortMatrix : public ::testing::Test { return true; } - bool is_csr_matrix_sorted(Csr *mtx) + bool is_csr_matrix_sorted(Csr* mtx) { auto size = mtx->get_size(); auto rows = mtx->get_const_row_ptrs(); diff --git a/core/test/utils/value_generator.hpp b/core/test/utils/value_generator.hpp index 8791bf6ce01..5c98a6afe8a 100644 --- a/core/test/utils/value_generator.hpp +++ b/core/test/utils/value_generator.hpp @@ -60,7 +60,7 @@ namespace detail { */ template typename std::enable_if::value, ValueType>::type -get_rand_value(ValueDistribution &&value_dist, Engine &&gen) +get_rand_value(ValueDistribution&& value_dist, Engine&& gen) { return value_dist(gen); } @@ -72,7 +72,7 @@ get_rand_value(ValueDistribution &&value_dist, Engine &&gen) */ template typename std::enable_if::value, ValueType>::type -get_rand_value(ValueDistribution &&value_dist, Engine &&gen) +get_rand_value(ValueDistribution&& value_dist, Engine&& gen) { return ValueType(value_dist(gen), value_dist(gen)); } diff --git a/core/test/utils/value_generator_test.cpp b/core/test/utils/value_generator_test.cpp index 58f033404a9..b30f9a2e793 100644 --- a/core/test/utils/value_generator_test.cpp +++ b/core/test/utils/value_generator_test.cpp @@ -105,12 +105,12 @@ TYPED_TEST(ValueGenerator, OutputHasCorrectAverageAndDeviation) // check the real part this->template check_average_and_deviation( begin(values), end(values), 20.0, 5.0, - [](T &val) { return gko::real(val); }); + [](T& val) { return gko::real(val); }); // check the imag part when the type is complex if (!std::is_same>::value) { this->template check_average_and_deviation( begin(values), end(values), 20.0, 5.0, - [](T &val) { return gko::imag(val); }); + [](T& val) { return gko::imag(val); }); } } diff --git a/cuda/base/cublas_bindings.hpp b/cuda/base/cublas_bindings.hpp index 6dfb383ca39..147715dc968 100644 --- a/cuda/base/cublas_bindings.hpp +++ b/cuda/base/cublas_bindings.hpp @@ -72,7 +72,7 @@ namespace detail { template -inline int64 not_implemented(Args &&...) +inline int64 not_implemented(Args&&...) { return static_cast(CUBLAS_STATUS_NOT_SUPPORTED); } @@ -100,9 +100,9 @@ struct is_supported> : std::true_type {}; #define GKO_BIND_CUBLAS_GEMM(ValueType, CublasName) \ inline void gemm(cublasHandle_t handle, cublasOperation_t transa, \ cublasOperation_t transb, int m, int n, int k, \ - const ValueType *alpha, const ValueType *a, int lda, \ - const ValueType *b, int ldb, const ValueType *beta, \ - ValueType *c, int ldc) \ + const ValueType* alpha, const ValueType* a, int lda, \ + const ValueType* b, int ldb, const ValueType* beta, \ + ValueType* c, int ldc) \ { \ GKO_ASSERT_NO_CUBLAS_ERRORS( \ CublasName(handle, transa, transb, m, n, k, as_culibs_type(alpha), \ @@ -126,9 +126,9 @@ GKO_BIND_CUBLAS_GEMM(ValueType, detail::not_implemented); #define GKO_BIND_CUBLAS_GEAM(ValueType, CublasName) \ inline void geam(cublasHandle_t handle, cublasOperation_t transa, \ cublasOperation_t transb, int m, int n, \ - const ValueType *alpha, const ValueType *a, int lda, \ - const ValueType *beta, const ValueType *b, int ldb, \ - ValueType *c, int ldc) \ + const ValueType* alpha, const ValueType* a, int lda, \ + const ValueType* beta, const ValueType* b, int ldb, \ + ValueType* c, int ldc) \ { \ GKO_ASSERT_NO_CUBLAS_ERRORS( \ CublasName(handle, transa, transb, m, n, as_culibs_type(alpha), \ @@ -150,8 +150,8 @@ GKO_BIND_CUBLAS_GEAM(ValueType, detail::not_implemented); #define GKO_BIND_CUBLAS_SCAL(ValueType, CublasName) \ - inline void scal(cublasHandle_t handle, int n, const ValueType *alpha, \ - ValueType *x, int incx) \ + inline void scal(cublasHandle_t handle, int n, const ValueType* alpha, \ + ValueType* x, int incx) \ { \ GKO_ASSERT_NO_CUBLAS_ERRORS(CublasName( \ handle, n, as_culibs_type(alpha), as_culibs_type(x), incx)); \ @@ -171,8 +171,8 @@ GKO_BIND_CUBLAS_SCAL(ValueType, detail::not_implemented); #define GKO_BIND_CUBLAS_AXPY(ValueType, CublasName) \ - inline void axpy(cublasHandle_t handle, int n, const ValueType *alpha, \ - const ValueType *x, int incx, ValueType *y, int incy) \ + inline void axpy(cublasHandle_t handle, int n, const ValueType* alpha, \ + const ValueType* x, int incx, ValueType* y, int incy) \ { \ GKO_ASSERT_NO_CUBLAS_ERRORS( \ CublasName(handle, n, as_culibs_type(alpha), as_culibs_type(x), \ @@ -193,8 +193,8 @@ GKO_BIND_CUBLAS_AXPY(ValueType, detail::not_implemented); #define GKO_BIND_CUBLAS_DOT(ValueType, CublasName) \ - inline void dot(cublasHandle_t handle, int n, const ValueType *x, \ - int incx, const ValueType *y, int incy, ValueType *result) \ + inline void dot(cublasHandle_t handle, int n, const ValueType* x, \ + int incx, const ValueType* y, int incy, ValueType* result) \ { \ GKO_ASSERT_NO_CUBLAS_ERRORS(CublasName(handle, n, as_culibs_type(x), \ incx, as_culibs_type(y), incy, \ @@ -215,9 +215,9 @@ GKO_BIND_CUBLAS_DOT(ValueType, detail::not_implemented); #define GKO_BIND_CUBLAS_CONJ_DOT(ValueType, CublasName) \ - inline void conj_dot(cublasHandle_t handle, int n, const ValueType *x, \ - int incx, const ValueType *y, int incy, \ - ValueType *result) \ + inline void conj_dot(cublasHandle_t handle, int n, const ValueType* x, \ + int incx, const ValueType* y, int incy, \ + ValueType* result) \ { \ GKO_ASSERT_NO_CUBLAS_ERRORS(CublasName(handle, n, as_culibs_type(x), \ incx, as_culibs_type(y), incy, \ @@ -238,8 +238,8 @@ GKO_BIND_CUBLAS_CONJ_DOT(ValueType, detail::not_implemented); #define GKO_BIND_CUBLAS_NORM2(ValueType, CublasName) \ - inline void norm2(cublasHandle_t handle, int n, const ValueType *x, \ - int incx, remove_complex *result) \ + inline void norm2(cublasHandle_t handle, int n, const ValueType* x, \ + int incx, remove_complex* result) \ { \ GKO_ASSERT_NO_CUBLAS_ERRORS(CublasName(handle, n, as_culibs_type(x), \ incx, as_culibs_type(result))); \ diff --git a/cuda/base/curand_bindings.hpp b/cuda/base/curand_bindings.hpp index 9df87b230ef..89fd0d83462 100644 --- a/cuda/base/curand_bindings.hpp +++ b/cuda/base/curand_bindings.hpp @@ -83,12 +83,12 @@ inline curandGenerator_t rand_generator(int64 seed, #define GKO_BIND_CURAND_RANDOM_VECTOR(ValueType, CurandName) \ inline void rand_vector( \ - curandGenerator_t &gen, int n, remove_complex mean, \ - remove_complex stddev, ValueType *values) \ + curandGenerator_t& gen, int n, remove_complex mean, \ + remove_complex stddev, ValueType* values) \ { \ n = is_complex() ? 2 * n : n; \ GKO_ASSERT_NO_CURAND_ERRORS(CurandName( \ - gen, reinterpret_cast *>(values), n, \ + gen, reinterpret_cast*>(values), n, \ mean, stddev)); \ } \ static_assert(true, \ diff --git a/cuda/base/cusparse_bindings.hpp b/cuda/base/cusparse_bindings.hpp index 8a3de85293b..3d871de39fe 100644 --- a/cuda/base/cusparse_bindings.hpp +++ b/cuda/base/cusparse_bindings.hpp @@ -92,11 +92,11 @@ struct is_supported, int32> : std::true_type {}; #define GKO_BIND_CUSPARSE32_SPMV(ValueType, CusparseName) \ inline void spmv(cusparseHandle_t handle, cusparseOperation_t transA, \ - int32 m, int32 n, int32 nnz, const ValueType *alpha, \ + int32 m, int32 n, int32 nnz, const ValueType* alpha, \ const cusparseMatDescr_t descrA, \ - const ValueType *csrValA, const int32 *csrRowPtrA, \ - const int32 *csrColIndA, const ValueType *x, \ - const ValueType *beta, ValueType *y) \ + const ValueType* csrValA, const int32* csrRowPtrA, \ + const int32* csrColIndA, const ValueType* x, \ + const ValueType* beta, ValueType* y) \ { \ GKO_ASSERT_NO_CUSPARSE_ERRORS(CusparseName( \ handle, transA, m, n, nnz, as_culibs_type(alpha), descrA, \ @@ -109,11 +109,11 @@ struct is_supported, int32> : std::true_type {}; #define GKO_BIND_CUSPARSE64_SPMV(ValueType, CusparseName) \ inline void spmv(cusparseHandle_t handle, cusparseOperation_t transA, \ - int64 m, int64 n, int64 nnz, const ValueType *alpha, \ + int64 m, int64 n, int64 nnz, const ValueType* alpha, \ const cusparseMatDescr_t descrA, \ - const ValueType *csrValA, const int64 *csrRowPtrA, \ - const int64 *csrColIndA, const ValueType *x, \ - const ValueType *beta, ValueType *y) GKO_NOT_IMPLEMENTED; \ + const ValueType* csrValA, const int64* csrRowPtrA, \ + const int64* csrColIndA, const ValueType* x, \ + const ValueType* beta, ValueType* y) GKO_NOT_IMPLEMENTED; \ static_assert(true, \ "This assert is used to counter the false positive extra " \ "semi-colon warnings") @@ -141,12 +141,12 @@ GKO_BIND_CUSPARSE64_SPMV(ValueType, detail::not_implemented); template inline void spmv_buffersize(cusparseHandle_t handle, cusparseOperation_t opA, - const ValueType *alpha, + const ValueType* alpha, const cusparseSpMatDescr_t matA, const cusparseDnVecDescr_t vecX, - const ValueType *beta, + const ValueType* beta, const cusparseDnVecDescr_t vecY, - cusparseSpMVAlg_t alg, size_type *bufferSize) + cusparseSpMVAlg_t alg, size_type* bufferSize) { constexpr auto value_type = cuda_data_type(); cusparseSpMV_bufferSize(handle, opA, alpha, matA, vecX, beta, vecY, @@ -155,10 +155,10 @@ inline void spmv_buffersize(cusparseHandle_t handle, cusparseOperation_t opA, template inline void spmv(cusparseHandle_t handle, cusparseOperation_t opA, - const ValueType *alpha, const cusparseSpMatDescr_t matA, - const cusparseDnVecDescr_t vecX, const ValueType *beta, + const ValueType* alpha, const cusparseSpMatDescr_t matA, + const cusparseDnVecDescr_t vecX, const ValueType* beta, const cusparseDnVecDescr_t vecY, cusparseSpMVAlg_t alg, - void *externalBuffer) + void* externalBuffer) { constexpr auto value_type = cuda_data_type(); cusparseSpMV(handle, opA, alpha, matA, vecX, beta, vecY, value_type, alg, @@ -173,11 +173,11 @@ inline void spmv(cusparseHandle_t handle, cusparseOperation_t opA, #define GKO_BIND_CUSPARSE32_SPMV(ValueType, CusparseName) \ inline void spmv_mp(cusparseHandle_t handle, cusparseOperation_t transA, \ - int32 m, int32 n, int32 nnz, const ValueType *alpha, \ + int32 m, int32 n, int32 nnz, const ValueType* alpha, \ const cusparseMatDescr_t descrA, \ - const ValueType *csrValA, const int32 *csrRowPtrA, \ - const int32 *csrColIndA, const ValueType *x, \ - const ValueType *beta, ValueType *y) \ + const ValueType* csrValA, const int32* csrRowPtrA, \ + const int32* csrColIndA, const ValueType* x, \ + const ValueType* beta, ValueType* y) \ { \ GKO_ASSERT_NO_CUSPARSE_ERRORS(CusparseName( \ handle, transA, m, n, nnz, as_culibs_type(alpha), descrA, \ @@ -191,10 +191,10 @@ inline void spmv(cusparseHandle_t handle, cusparseOperation_t opA, #define GKO_BIND_CUSPARSE64_SPMV(ValueType, CusparseName) \ inline void spmv_mp( \ cusparseHandle_t handle, cusparseOperation_t transA, int64 m, int64 n, \ - int64 nnz, const ValueType *alpha, const cusparseMatDescr_t descrA, \ - const ValueType *csrValA, const int64 *csrRowPtrA, \ - const int64 *csrColIndA, const ValueType *x, const ValueType *beta, \ - ValueType *y) GKO_NOT_IMPLEMENTED; \ + int64 nnz, const ValueType* alpha, const cusparseMatDescr_t descrA, \ + const ValueType* csrValA, const int64* csrRowPtrA, \ + const int64* csrColIndA, const ValueType* x, const ValueType* beta, \ + ValueType* y) GKO_NOT_IMPLEMENTED; \ static_assert(true, \ "This assert is used to counter the false positive extra " \ "semi-colon warnings") @@ -220,10 +220,10 @@ GKO_BIND_CUSPARSE64_SPMV(ValueType, detail::not_implemented); #define GKO_BIND_CUSPARSE32_SPMM(ValueType, CusparseName) \ inline void spmm(cusparseHandle_t handle, cusparseOperation_t transA, \ int32 m, int32 n, int32 k, int32 nnz, \ - const ValueType *alpha, const cusparseMatDescr_t descrA, \ - const ValueType *csrValA, const int32 *csrRowPtrA, \ - const int32 *csrColIndA, const ValueType *B, int32 ldb, \ - const ValueType *beta, ValueType *C, int32 ldc) \ + const ValueType* alpha, const cusparseMatDescr_t descrA, \ + const ValueType* csrValA, const int32* csrRowPtrA, \ + const int32* csrColIndA, const ValueType* B, int32 ldb, \ + const ValueType* beta, ValueType* C, int32 ldc) \ { \ GKO_ASSERT_NO_CUSPARSE_ERRORS( \ CusparseName(handle, transA, m, n, k, nnz, as_culibs_type(alpha), \ @@ -238,10 +238,10 @@ GKO_BIND_CUSPARSE64_SPMV(ValueType, detail::not_implemented); #define GKO_BIND_CUSPARSE64_SPMM(ValueType, CusparseName) \ inline void spmm(cusparseHandle_t handle, cusparseOperation_t transA, \ int64 m, int64 n, int64 k, int64 nnz, \ - const ValueType *alpha, const cusparseMatDescr_t descrA, \ - const ValueType *csrValA, const int64 *csrRowPtrA, \ - const int64 *csrColIndA, const ValueType *B, int64 ldb, \ - const ValueType *beta, ValueType *C, int64 ldc) \ + const ValueType* alpha, const cusparseMatDescr_t descrA, \ + const ValueType* csrValA, const int64* csrRowPtrA, \ + const int64* csrColIndA, const ValueType* B, int64 ldb, \ + const ValueType* beta, ValueType* C, int64 ldc) \ GKO_NOT_IMPLEMENTED; \ static_assert(true, \ "This assert is used to counter the false positive extra " \ @@ -271,21 +271,21 @@ GKO_BIND_CUSPARSE64_SPMM(ValueType, detail::not_implemented); template inline void spmv(cusparseHandle_t handle, cusparseAlgMode_t alg, cusparseOperation_t transA, IndexType m, IndexType n, - IndexType nnz, const ValueType *alpha, - const cusparseMatDescr_t descrA, const ValueType *csrValA, - const IndexType *csrRowPtrA, const IndexType *csrColIndA, - const ValueType *x, const ValueType *beta, ValueType *y, - void *buffer) GKO_NOT_IMPLEMENTED; + IndexType nnz, const ValueType* alpha, + const cusparseMatDescr_t descrA, const ValueType* csrValA, + const IndexType* csrRowPtrA, const IndexType* csrColIndA, + const ValueType* x, const ValueType* beta, ValueType* y, + void* buffer) GKO_NOT_IMPLEMENTED; #define GKO_BIND_CUSPARSE_SPMV(ValueType) \ template <> \ inline void spmv( \ cusparseHandle_t handle, cusparseAlgMode_t alg, \ cusparseOperation_t transA, int32 m, int32 n, int32 nnz, \ - const ValueType *alpha, const cusparseMatDescr_t descrA, \ - const ValueType *csrValA, const int32 *csrRowPtrA, \ - const int32 *csrColIndA, const ValueType *x, const ValueType *beta, \ - ValueType *y, void *buffer) \ + const ValueType* alpha, const cusparseMatDescr_t descrA, \ + const ValueType* csrValA, const int32* csrRowPtrA, \ + const int32* csrColIndA, const ValueType* x, const ValueType* beta, \ + ValueType* y, void* buffer) \ { \ auto data_type = gko::kernels::cuda::cuda_data_type(); \ if (data_type == CUDA_C_8U) { \ @@ -312,23 +312,23 @@ GKO_BIND_CUSPARSE_SPMV(std::complex); template inline void spmv_buffersize(cusparseHandle_t handle, cusparseAlgMode_t alg, cusparseOperation_t transA, IndexType m, - IndexType n, IndexType nnz, const ValueType *alpha, + IndexType n, IndexType nnz, const ValueType* alpha, const cusparseMatDescr_t descrA, - const ValueType *csrValA, - const IndexType *csrRowPtrA, - const IndexType *csrColIndA, const ValueType *x, - const ValueType *beta, ValueType *y, - size_type *bufferSizeInBytes) GKO_NOT_IMPLEMENTED; + const ValueType* csrValA, + const IndexType* csrRowPtrA, + const IndexType* csrColIndA, const ValueType* x, + const ValueType* beta, ValueType* y, + size_type* bufferSizeInBytes) GKO_NOT_IMPLEMENTED; #define GKO_BIND_CUSPARSE_SPMV_BUFFERSIZE(ValueType) \ template <> \ inline void spmv_buffersize( \ cusparseHandle_t handle, cusparseAlgMode_t alg, \ cusparseOperation_t transA, int32 m, int32 n, int32 nnz, \ - const ValueType *alpha, const cusparseMatDescr_t descrA, \ - const ValueType *csrValA, const int32 *csrRowPtrA, \ - const int32 *csrColIndA, const ValueType *x, const ValueType *beta, \ - ValueType *y, size_type *bufferSizeInBytes) \ + const ValueType* alpha, const cusparseMatDescr_t descrA, \ + const ValueType* csrValA, const int32* csrRowPtrA, \ + const int32* csrColIndA, const ValueType* x, const ValueType* beta, \ + ValueType* y, size_type* bufferSizeInBytes) \ { \ auto data_type = gko::kernels::cuda::cuda_data_type(); \ if (data_type == CUDA_C_8U) { \ @@ -357,9 +357,9 @@ GKO_BIND_CUSPARSE_SPMV_BUFFERSIZE(std::complex); #define GKO_BIND_CUSPARSE32_SPMV(ValueType, CusparseName) \ inline void spmv(cusparseHandle_t handle, cusparseOperation_t transA, \ - const ValueType *alpha, const cusparseMatDescr_t descrA, \ - const cusparseHybMat_t hybA, const ValueType *x, \ - const ValueType *beta, ValueType *y) \ + const ValueType* alpha, const cusparseMatDescr_t descrA, \ + const cusparseHybMat_t hybA, const ValueType* x, \ + const ValueType* beta, ValueType* y) \ { \ GKO_ASSERT_NO_CUSPARSE_ERRORS(CusparseName( \ handle, transA, as_culibs_type(alpha), descrA, hybA, \ @@ -389,24 +389,24 @@ GKO_BIND_CUSPARSE32_SPMV(ValueType, detail::not_implemented); template void spgemm_buffer_size( cusparseHandle_t handle, IndexType m, IndexType n, IndexType k, - const ValueType *alpha, const cusparseMatDescr_t descrA, IndexType nnzA, - const IndexType *csrRowPtrA, const IndexType *csrColIndA, + const ValueType* alpha, const cusparseMatDescr_t descrA, IndexType nnzA, + const IndexType* csrRowPtrA, const IndexType* csrColIndA, const cusparseMatDescr_t descrB, IndexType nnzB, - const IndexType *csrRowPtrB, const IndexType *csrColIndB, - const ValueType *beta, const cusparseMatDescr_t descrD, IndexType nnzD, - const IndexType *csrRowPtrD, const IndexType *csrColIndD, - csrgemm2Info_t info, size_type &result) GKO_NOT_IMPLEMENTED; + const IndexType* csrRowPtrB, const IndexType* csrColIndB, + const ValueType* beta, const cusparseMatDescr_t descrD, IndexType nnzD, + const IndexType* csrRowPtrD, const IndexType* csrColIndD, + csrgemm2Info_t info, size_type& result) GKO_NOT_IMPLEMENTED; #define GKO_BIND_CUSPARSE_SPGEMM_BUFFER_SIZE(ValueType, CusparseName) \ template <> \ inline void spgemm_buffer_size( \ cusparseHandle_t handle, int32 m, int32 n, int32 k, \ - const ValueType *alpha, const cusparseMatDescr_t descrA, int32 nnzA, \ - const int32 *csrRowPtrA, const int32 *csrColIndA, \ - const cusparseMatDescr_t descrB, int32 nnzB, const int32 *csrRowPtrB, \ - const int32 *csrColIndB, const ValueType *beta, \ - const cusparseMatDescr_t descrD, int32 nnzD, const int32 *csrRowPtrD, \ - const int32 *csrColIndD, csrgemm2Info_t info, size_type &result) \ + const ValueType* alpha, const cusparseMatDescr_t descrA, int32 nnzA, \ + const int32* csrRowPtrA, const int32* csrColIndA, \ + const cusparseMatDescr_t descrB, int32 nnzB, const int32* csrRowPtrB, \ + const int32* csrColIndB, const ValueType* beta, \ + const cusparseMatDescr_t descrD, int32 nnzD, const int32* csrRowPtrD, \ + const int32* csrColIndD, csrgemm2Info_t info, size_type& result) \ { \ GKO_ASSERT_NO_CUSPARSE_ERRORS( \ CusparseName(handle, m, n, k, as_culibs_type(alpha), descrA, nnzA, \ @@ -432,24 +432,24 @@ GKO_BIND_CUSPARSE_SPGEMM_BUFFER_SIZE(std::complex, template void spgemm_nnz(cusparseHandle_t handle, IndexType m, IndexType n, IndexType k, const cusparseMatDescr_t descrA, IndexType nnzA, - const IndexType *csrRowPtrA, const IndexType *csrColIndA, + const IndexType* csrRowPtrA, const IndexType* csrColIndA, const cusparseMatDescr_t descrB, IndexType nnzB, - const IndexType *csrRowPtrB, const IndexType *csrColIndB, + const IndexType* csrRowPtrB, const IndexType* csrColIndB, const cusparseMatDescr_t descrD, IndexType nnzD, - const IndexType *csrRowPtrD, const IndexType *csrColIndD, - const cusparseMatDescr_t descrC, IndexType *csrRowPtrC, - IndexType *nnzC, csrgemm2Info_t info, - void *buffer) GKO_NOT_IMPLEMENTED; + const IndexType* csrRowPtrD, const IndexType* csrColIndD, + const cusparseMatDescr_t descrC, IndexType* csrRowPtrC, + IndexType* nnzC, csrgemm2Info_t info, + void* buffer) GKO_NOT_IMPLEMENTED; template <> inline void spgemm_nnz( cusparseHandle_t handle, int32 m, int32 n, int32 k, - const cusparseMatDescr_t descrA, int32 nnzA, const int32 *csrRowPtrA, - const int32 *csrColIndA, const cusparseMatDescr_t descrB, int32 nnzB, - const int32 *csrRowPtrB, const int32 *csrColIndB, - const cusparseMatDescr_t descrD, int32 nnzD, const int32 *csrRowPtrD, - const int32 *csrColIndD, const cusparseMatDescr_t descrC, int32 *csrRowPtrC, - int32 *nnzC, csrgemm2Info_t info, void *buffer) + const cusparseMatDescr_t descrA, int32 nnzA, const int32* csrRowPtrA, + const int32* csrColIndA, const cusparseMatDescr_t descrB, int32 nnzB, + const int32* csrRowPtrB, const int32* csrColIndB, + const cusparseMatDescr_t descrD, int32 nnzD, const int32* csrRowPtrD, + const int32* csrColIndD, const cusparseMatDescr_t descrC, int32* csrRowPtrC, + int32* nnzC, csrgemm2Info_t info, void* buffer) { GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseXcsrgemm2Nnz( handle, m, n, k, descrA, nnzA, csrRowPtrA, csrColIndA, descrB, nnzB, @@ -460,33 +460,33 @@ inline void spgemm_nnz( template void spgemm(cusparseHandle_t handle, IndexType m, IndexType n, IndexType k, - const ValueType *alpha, const cusparseMatDescr_t descrA, - IndexType nnzA, const ValueType *csrValA, - const IndexType *csrRowPtrA, const IndexType *csrColIndA, + const ValueType* alpha, const cusparseMatDescr_t descrA, + IndexType nnzA, const ValueType* csrValA, + const IndexType* csrRowPtrA, const IndexType* csrColIndA, const cusparseMatDescr_t descrB, IndexType nnzB, - const ValueType *csrValB, const IndexType *csrRowPtrB, - const IndexType *csrColIndB, const ValueType *beta, + const ValueType* csrValB, const IndexType* csrRowPtrB, + const IndexType* csrColIndB, const ValueType* beta, const cusparseMatDescr_t descrD, IndexType nnzD, - const ValueType *csrValD, const IndexType *csrRowPtrD, - const IndexType *csrColIndD, const cusparseMatDescr_t descrC, - ValueType *csrValC, const IndexType *csrRowPtrC, - IndexType *csrColIndC, csrgemm2Info_t info, - void *buffer) GKO_NOT_IMPLEMENTED; + const ValueType* csrValD, const IndexType* csrRowPtrD, + const IndexType* csrColIndD, const cusparseMatDescr_t descrC, + ValueType* csrValC, const IndexType* csrRowPtrC, + IndexType* csrColIndC, csrgemm2Info_t info, + void* buffer) GKO_NOT_IMPLEMENTED; #define GKO_BIND_CUSPARSE_SPGEMM(ValueType, CusparseName) \ template <> \ inline void spgemm( \ cusparseHandle_t handle, int32 m, int32 n, int32 k, \ - const ValueType *alpha, const cusparseMatDescr_t descrA, int32 nnzA, \ - const ValueType *csrValA, const int32 *csrRowPtrA, \ - const int32 *csrColIndA, const cusparseMatDescr_t descrB, int32 nnzB, \ - const ValueType *csrValB, const int32 *csrRowPtrB, \ - const int32 *csrColIndB, const ValueType *beta, \ - const cusparseMatDescr_t descrD, int32 nnzD, const ValueType *csrValD, \ - const int32 *csrRowPtrD, const int32 *csrColIndD, \ - const cusparseMatDescr_t descrC, ValueType *csrValC, \ - const int32 *csrRowPtrC, int32 *csrColIndC, csrgemm2Info_t info, \ - void *buffer) \ + const ValueType* alpha, const cusparseMatDescr_t descrA, int32 nnzA, \ + const ValueType* csrValA, const int32* csrRowPtrA, \ + const int32* csrColIndA, const cusparseMatDescr_t descrB, int32 nnzB, \ + const ValueType* csrValB, const int32* csrRowPtrB, \ + const int32* csrColIndB, const ValueType* beta, \ + const cusparseMatDescr_t descrD, int32 nnzD, const ValueType* csrValD, \ + const int32* csrRowPtrD, const int32* csrColIndD, \ + const cusparseMatDescr_t descrC, ValueType* csrValC, \ + const int32* csrRowPtrC, int32* csrColIndC, csrgemm2Info_t info, \ + void* buffer) \ { \ GKO_ASSERT_NO_CUSPARSE_ERRORS(CusparseName( \ handle, m, n, k, as_culibs_type(alpha), descrA, nnzA, \ @@ -513,12 +513,12 @@ GKO_BIND_CUSPARSE_SPGEMM(std::complex, cusparseZcsrgemm2); template -void spgemm_work_estimation(cusparseHandle_t handle, const ValueType *alpha, +void spgemm_work_estimation(cusparseHandle_t handle, const ValueType* alpha, cusparseSpMatDescr_t a_descr, - cusparseSpMatDescr_t b_descr, const ValueType *beta, + cusparseSpMatDescr_t b_descr, const ValueType* beta, cusparseSpMatDescr_t c_descr, cusparseSpGEMMDescr_t spgemm_descr, - size_type &buffer1_size, void *buffer1) + size_type& buffer1_size, void* buffer1) { GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSpGEMM_workEstimation( handle, CUSPARSE_OPERATION_NON_TRANSPOSE, @@ -529,11 +529,11 @@ void spgemm_work_estimation(cusparseHandle_t handle, const ValueType *alpha, template -void spgemm_compute(cusparseHandle_t handle, const ValueType *alpha, +void spgemm_compute(cusparseHandle_t handle, const ValueType* alpha, cusparseSpMatDescr_t a_descr, cusparseSpMatDescr_t b_descr, - const ValueType *beta, cusparseSpMatDescr_t c_descr, - cusparseSpGEMMDescr_t spgemm_descr, void *buffer1, - size_type &buffer2_size, void *buffer2) + const ValueType* beta, cusparseSpMatDescr_t c_descr, + cusparseSpGEMMDescr_t spgemm_descr, void* buffer1, + size_type& buffer2_size, void* buffer2) { GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSpGEMM_compute( handle, CUSPARSE_OPERATION_NON_TRANSPOSE, @@ -544,9 +544,9 @@ void spgemm_compute(cusparseHandle_t handle, const ValueType *alpha, template -void spgemm_copy(cusparseHandle_t handle, const ValueType *alpha, +void spgemm_copy(cusparseHandle_t handle, const ValueType* alpha, cusparseSpMatDescr_t a_descr, cusparseSpMatDescr_t b_descr, - const ValueType *beta, cusparseSpMatDescr_t c_descr, + const ValueType* beta, cusparseSpMatDescr_t c_descr, cusparseSpGEMMDescr_t spgemm_descr) { GKO_ASSERT_NO_CUSPARSE_ERRORS( @@ -568,8 +568,8 @@ inline size_type sparse_matrix_nnz(cusparseSpMatDescr_t descr) template -void csr_set_pointers(cusparseSpMatDescr_t descr, IndexType *row_ptrs, - IndexType *col_idxs, ValueType *vals) +void csr_set_pointers(cusparseSpMatDescr_t descr, IndexType* row_ptrs, + IndexType* col_idxs, ValueType* vals) { cusparseCsrSetPointers(descr, row_ptrs, col_idxs, vals); } @@ -584,8 +584,8 @@ void csr_set_pointers(cusparseSpMatDescr_t descr, IndexType *row_ptrs, #define GKO_BIND_CUSPARSE32_CSR2HYB(ValueType, CusparseName) \ inline void csr2hyb(cusparseHandle_t handle, int32 m, int32 n, \ const cusparseMatDescr_t descrA, \ - const ValueType *csrValA, const int32 *csrRowPtrA, \ - const int32 *csrColIndA, cusparseHybMat_t hybA, \ + const ValueType* csrValA, const int32* csrRowPtrA, \ + const int32* csrColIndA, cusparseHybMat_t hybA, \ int32 userEllWidth, \ cusparseHybPartition_t partitionType) \ { \ @@ -600,8 +600,8 @@ void csr_set_pointers(cusparseSpMatDescr_t descr, IndexType *row_ptrs, #define GKO_BIND_CUSPARSE64_CSR2HYB(ValueType, CusparseName) \ inline void csr2hyb( \ cusparseHandle_t handle, int64 m, int64 n, \ - const cusparseMatDescr_t descrA, const ValueType *csrValA, \ - const int64 *csrRowPtrA, const int64 *csrColIndA, \ + const cusparseMatDescr_t descrA, const ValueType* csrValA, \ + const int64* csrRowPtrA, const int64* csrColIndA, \ cusparseHybMat_t hybA, int64 userEllWidth, \ cusparseHybPartition_t partitionType) GKO_NOT_IMPLEMENTED; \ static_assert(true, \ @@ -633,10 +633,10 @@ GKO_BIND_CUSPARSE64_CSR2HYB(ValueType, detail::not_implemented); template inline void transpose(cusparseHandle_t handle, size_type m, size_type n, - size_type nnz, const ValueType *OrigValA, - const IndexType *OrigRowPtrA, - const IndexType *OrigColIndA, ValueType *TransValA, - IndexType *TransRowPtrA, IndexType *TransColIndA, + size_type nnz, const ValueType* OrigValA, + const IndexType* OrigRowPtrA, + const IndexType* OrigColIndA, ValueType* TransValA, + IndexType* TransRowPtrA, IndexType* TransColIndA, cusparseAction_t copyValues, cusparseIndexBase_t idxBase) GKO_NOT_IMPLEMENTED; @@ -646,9 +646,9 @@ inline void transpose(cusparseHandle_t handle, size_type m, size_type n, template <> \ inline void transpose( \ cusparseHandle_t handle, size_type m, size_type n, size_type nnz, \ - const ValueType *OrigValA, const int32 *OrigRowPtrA, \ - const int32 *OrigColIndA, ValueType *TransValA, int32 *TransRowPtrA, \ - int32 *TransColIndA, cusparseAction_t copyValues, \ + const ValueType* OrigValA, const int32* OrigRowPtrA, \ + const int32* OrigColIndA, ValueType* TransValA, int32* TransRowPtrA, \ + int32* TransColIndA, cusparseAction_t copyValues, \ cusparseIndexBase_t idxBase) \ { \ GKO_ASSERT_NO_CUSPARSE_ERRORS( \ @@ -673,21 +673,21 @@ GKO_BIND_CUSPARSE_TRANSPOSE32(std::complex, cusparseZcsr2csc); template inline void transpose_buffersize( cusparseHandle_t handle, size_type m, size_type n, size_type nnz, - const ValueType *OrigValA, const IndexType *OrigRowPtrA, - const IndexType *OrigColIndA, ValueType *TransValA, IndexType *TransRowPtrA, - IndexType *TransColIndA, cudaDataType_t valType, + const ValueType* OrigValA, const IndexType* OrigRowPtrA, + const IndexType* OrigColIndA, ValueType* TransValA, IndexType* TransRowPtrA, + IndexType* TransColIndA, cudaDataType_t valType, cusparseAction_t copyValues, cusparseIndexBase_t idxBase, - cusparseCsr2CscAlg_t alg, size_type *buffer_size) GKO_NOT_IMPLEMENTED; + cusparseCsr2CscAlg_t alg, size_type* buffer_size) GKO_NOT_IMPLEMENTED; #define GKO_BIND_CUSPARSE_TRANSPOSE_BUFFERSIZE32(ValueType) \ template <> \ inline void transpose_buffersize( \ cusparseHandle_t handle, size_type m, size_type n, size_type nnz, \ - const ValueType *OrigValA, const int32 *OrigRowPtrA, \ - const int32 *OrigColIndA, ValueType *TransValA, int32 *TransRowPtrA, \ - int32 *TransColIndA, cudaDataType_t valType, \ + const ValueType* OrigValA, const int32* OrigRowPtrA, \ + const int32* OrigColIndA, ValueType* TransValA, int32* TransRowPtrA, \ + int32* TransColIndA, cudaDataType_t valType, \ cusparseAction_t copyValues, cusparseIndexBase_t idxBase, \ - cusparseCsr2CscAlg_t alg, size_type *buffer_size) \ + cusparseCsr2CscAlg_t alg, size_type* buffer_size) \ { \ GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseCsr2cscEx2_bufferSize( \ handle, m, n, nnz, OrigValA, OrigRowPtrA, OrigColIndA, TransValA, \ @@ -705,23 +705,23 @@ GKO_BIND_CUSPARSE_TRANSPOSE_BUFFERSIZE32(std::complex); template inline void transpose(cusparseHandle_t handle, size_type m, size_type n, - size_type nnz, const ValueType *OrigValA, - const IndexType *OrigRowPtrA, - const IndexType *OrigColIndA, ValueType *TransValA, - IndexType *TransRowPtrA, IndexType *TransColIndA, + size_type nnz, const ValueType* OrigValA, + const IndexType* OrigRowPtrA, + const IndexType* OrigColIndA, ValueType* TransValA, + IndexType* TransRowPtrA, IndexType* TransColIndA, cudaDataType_t valType, cusparseAction_t copyValues, cusparseIndexBase_t idxBase, cusparseCsr2CscAlg_t alg, - void *buffer) GKO_NOT_IMPLEMENTED; + void* buffer) GKO_NOT_IMPLEMENTED; #define GKO_BIND_CUSPARSE_TRANSPOSE32(ValueType) \ template <> \ inline void transpose( \ cusparseHandle_t handle, size_type m, size_type n, size_type nnz, \ - const ValueType *OrigValA, const int32 *OrigRowPtrA, \ - const int32 *OrigColIndA, ValueType *TransValA, int32 *TransRowPtrA, \ - int32 *TransColIndA, cudaDataType_t valType, \ + const ValueType* OrigValA, const int32* OrigRowPtrA, \ + const int32* OrigColIndA, ValueType* TransValA, int32* TransRowPtrA, \ + int32* TransColIndA, cudaDataType_t valType, \ cusparseAction_t copyValues, cusparseIndexBase_t idxBase, \ - cusparseCsr2CscAlg_t alg, void *buffer) \ + cusparseCsr2CscAlg_t alg, void* buffer) \ { \ GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseCsr2cscEx2( \ handle, m, n, nnz, OrigValA, OrigRowPtrA, OrigColIndA, TransValA, \ @@ -790,7 +790,7 @@ inline void destroy(cusparseSpGEMMDescr_t info) template -inline cusparseDnVecDescr_t create_dnvec(int64_t size, ValueType *values) +inline cusparseDnVecDescr_t create_dnvec(int64_t size, ValueType* values) { cusparseDnVecDescr_t descr{}; constexpr auto value_type = cuda_data_type(); @@ -808,7 +808,7 @@ inline void destroy(cusparseDnVecDescr_t descr) template inline cusparseSpVecDescr_t create_spvec(int64_t size, int64_t nnz, - IndexType *indices, ValueType *values) + IndexType* indices, ValueType* values) { cusparseSpVecDescr_t descr{}; constexpr auto index_type = cusparse_index_type(); @@ -828,9 +828,9 @@ inline void destroy(cusparseSpVecDescr_t descr) template inline cusparseSpMatDescr_t create_csr(int64_t rows, int64_t cols, int64_t nnz, - IndexType *csrRowOffsets, - IndexType *csrColInd, - ValueType *csrValues) + IndexType* csrRowOffsets, + IndexType* csrColInd, + ValueType* csrValues) { cusparseSpMatDescr_t descr{}; constexpr auto index_type = cusparse_index_type(); @@ -925,11 +925,11 @@ inline void destroy(csric02Info_t info) inline void buffer_size_ext( \ cusparseHandle_t handle, int algo, cusparseOperation_t trans1, \ cusparseOperation_t trans2, size_type m, size_type n, size_type nnz, \ - const ValueType *one, const cusparseMatDescr_t descr, \ - const ValueType *csrVal, const int32 *csrRowPtr, \ - const int32 *csrColInd, const ValueType *rhs, int32 sol_size, \ + const ValueType* one, const cusparseMatDescr_t descr, \ + const ValueType* csrVal, const int32* csrRowPtr, \ + const int32* csrColInd, const ValueType* rhs, int32 sol_size, \ csrsm2Info_t factor_info, cusparseSolvePolicy_t policy, \ - size_type *factor_work_size) \ + size_type* factor_work_size) \ { \ GKO_ASSERT_NO_CUSPARSE_ERRORS( \ CusparseName(handle, algo, trans1, trans2, m, n, nnz, \ @@ -945,11 +945,11 @@ inline void destroy(csric02Info_t info) inline void buffer_size_ext( \ cusparseHandle_t handle, int algo, cusparseOperation_t trans1, \ cusparseOperation_t trans2, size_type m, size_type n, size_type nnz, \ - const ValueType *one, const cusparseMatDescr_t descr, \ - const ValueType *csrVal, const int64 *csrRowPtr, \ - const int64 *csrColInd, const ValueType *rhs, int64 sol_size, \ + const ValueType* one, const cusparseMatDescr_t descr, \ + const ValueType* csrVal, const int64* csrRowPtr, \ + const int64* csrColInd, const ValueType* rhs, int64 sol_size, \ csrsm2Info_t factor_info, cusparseSolvePolicy_t policy, \ - size_type *factor_work_size) GKO_NOT_IMPLEMENTED; \ + size_type* factor_work_size) GKO_NOT_IMPLEMENTED; \ static_assert(true, \ "This assert is used to counter the false positive extra " \ "semi-colon warnings") @@ -978,11 +978,11 @@ GKO_BIND_CUSPARSE64_BUFFERSIZEEXT(ValueType, detail::not_implemented); inline void csrsm2_analysis( \ cusparseHandle_t handle, int algo, cusparseOperation_t trans1, \ cusparseOperation_t trans2, size_type m, size_type n, size_type nnz, \ - const ValueType *one, const cusparseMatDescr_t descr, \ - const ValueType *csrVal, const int32 *csrRowPtr, \ - const int32 *csrColInd, const ValueType *rhs, int32 sol_size, \ + const ValueType* one, const cusparseMatDescr_t descr, \ + const ValueType* csrVal, const int32* csrRowPtr, \ + const int32* csrColInd, const ValueType* rhs, int32 sol_size, \ csrsm2Info_t factor_info, cusparseSolvePolicy_t policy, \ - void *factor_work_vec) \ + void* factor_work_vec) \ { \ GKO_ASSERT_NO_CUSPARSE_ERRORS( \ CusparseName(handle, algo, trans1, trans2, m, n, nnz, \ @@ -998,11 +998,11 @@ GKO_BIND_CUSPARSE64_BUFFERSIZEEXT(ValueType, detail::not_implemented); inline void csrsm2_analysis( \ cusparseHandle_t handle, int algo, cusparseOperation_t trans1, \ cusparseOperation_t trans2, size_type m, size_type n, size_type nnz, \ - const ValueType *one, const cusparseMatDescr_t descr, \ - const ValueType *csrVal, const int64 *csrRowPtr, \ - const int64 *csrColInd, const ValueType *rhs, int64 sol_size, \ + const ValueType* one, const cusparseMatDescr_t descr, \ + const ValueType* csrVal, const int64* csrRowPtr, \ + const int64* csrColInd, const ValueType* rhs, int64 sol_size, \ csrsm2Info_t factor_info, cusparseSolvePolicy_t policy, \ - void *factor_work_vec) GKO_NOT_IMPLEMENTED; \ + void* factor_work_vec) GKO_NOT_IMPLEMENTED; \ static_assert(true, \ "This assert is used to counter the false positive extra " \ "semi-colon warnings") @@ -1031,11 +1031,11 @@ GKO_BIND_CUSPARSE64_CSRSM2_ANALYSIS(ValueType, detail::not_implemented); inline void csrsm2_solve( \ cusparseHandle_t handle, int algo, cusparseOperation_t trans1, \ cusparseOperation_t trans2, size_type m, size_type n, size_type nnz, \ - const ValueType *one, const cusparseMatDescr_t descr, \ - const ValueType *csrVal, const int32 *csrRowPtr, \ - const int32 *csrColInd, ValueType *rhs, int32 sol_stride, \ + const ValueType* one, const cusparseMatDescr_t descr, \ + const ValueType* csrVal, const int32* csrRowPtr, \ + const int32* csrColInd, ValueType* rhs, int32 sol_stride, \ csrsm2Info_t factor_info, cusparseSolvePolicy_t policy, \ - void *factor_work_vec) \ + void* factor_work_vec) \ { \ GKO_ASSERT_NO_CUSPARSE_ERRORS( \ CusparseName(handle, algo, trans1, trans2, m, n, nnz, \ @@ -1051,11 +1051,11 @@ GKO_BIND_CUSPARSE64_CSRSM2_ANALYSIS(ValueType, detail::not_implemented); inline void csrsm2_solve( \ cusparseHandle_t handle, int algo, cusparseOperation_t trans1, \ cusparseOperation_t trans2, size_type m, size_type n, size_type nnz, \ - const ValueType *one, const cusparseMatDescr_t descr, \ - const ValueType *csrVal, const int64 *csrRowPtr, \ - const int64 *csrColInd, ValueType *rhs, int64 sol_stride, \ + const ValueType* one, const cusparseMatDescr_t descr, \ + const ValueType* csrVal, const int64* csrRowPtr, \ + const int64* csrColInd, ValueType* rhs, int64 sol_stride, \ csrsm2Info_t factor_info, cusparseSolvePolicy_t policy, \ - void *factor_work_vec) GKO_NOT_IMPLEMENTED; \ + void* factor_work_vec) GKO_NOT_IMPLEMENTED; \ static_assert(true, \ "This assert is used to counter the false positive extra " \ "semi-colon warnings") @@ -1084,8 +1084,8 @@ GKO_BIND_CUSPARSE64_CSRSM2_SOLVE(ValueType, detail::not_implemented); inline void csrsm_analysis( \ cusparseHandle_t handle, cusparseOperation_t trans, size_type m, \ size_type nnz, const cusparseMatDescr_t descr, \ - const ValueType *csrVal, const int32 *csrRowPtr, \ - const int32 *csrColInd, cusparseSolveAnalysisInfo_t factor_info) \ + const ValueType* csrVal, const int32* csrRowPtr, \ + const int32* csrColInd, cusparseSolveAnalysisInfo_t factor_info) \ { \ GKO_ASSERT_NO_CUSPARSE_ERRORS( \ CusparseName(handle, trans, m, nnz, descr, as_culibs_type(csrVal), \ @@ -1099,8 +1099,8 @@ GKO_BIND_CUSPARSE64_CSRSM2_SOLVE(ValueType, detail::not_implemented); inline void csrsm_analysis( \ cusparseHandle_t handle, cusparseOperation_t trans, size_type m, \ size_type nnz, const cusparseMatDescr_t descr, \ - const ValueType *csrVal, const int64 *csrRowPtr, \ - const int64 *csrColInd, cusparseSolveAnalysisInfo_t factor_info) \ + const ValueType* csrVal, const int64* csrRowPtr, \ + const int64* csrColInd, cusparseSolveAnalysisInfo_t factor_info) \ GKO_NOT_IMPLEMENTED; \ static_assert(true, \ "This assert is used to counter the " \ @@ -1129,10 +1129,10 @@ GKO_BIND_CUSPARSE64_CSRSM_ANALYSIS(ValueType, detail::not_implemented); #define GKO_BIND_CUSPARSE32_CSRSM_SOLVE(ValueType, CusparseName) \ inline void csrsm_solve( \ cusparseHandle_t handle, cusparseOperation_t trans, size_type m, \ - size_type n, const ValueType *one, const cusparseMatDescr_t descr, \ - const ValueType *csrVal, const int32 *csrRowPtr, \ - const int32 *csrColInd, cusparseSolveAnalysisInfo_t factor_info, \ - const ValueType *rhs, int32 rhs_stride, ValueType *sol, \ + size_type n, const ValueType* one, const cusparseMatDescr_t descr, \ + const ValueType* csrVal, const int32* csrRowPtr, \ + const int32* csrColInd, cusparseSolveAnalysisInfo_t factor_info, \ + const ValueType* rhs, int32 rhs_stride, ValueType* sol, \ int32 sol_stride) \ { \ GKO_ASSERT_NO_CUSPARSE_ERRORS( \ @@ -1148,10 +1148,10 @@ GKO_BIND_CUSPARSE64_CSRSM_ANALYSIS(ValueType, detail::not_implemented); #define GKO_BIND_CUSPARSE64_CSRSM_SOLVE(ValueType, CusparseName) \ inline void csrsm_solve( \ cusparseHandle_t handle, cusparseOperation_t trans1, size_type m, \ - size_type n, const ValueType *one, const cusparseMatDescr_t descr, \ - const ValueType *csrVal, const int64 *csrRowPtr, \ - const int64 *csrColInd, cusparseSolveAnalysisInfo_t factor_info, \ - const ValueType *rhs, int64 rhs_stride, ValueType *sol, \ + size_type n, const ValueType* one, const cusparseMatDescr_t descr, \ + const ValueType* csrVal, const int64* csrRowPtr, \ + const int64* csrColInd, cusparseSolveAnalysisInfo_t factor_info, \ + const ValueType* rhs, int64 rhs_stride, ValueType* sol, \ int64 sol_stride) GKO_NOT_IMPLEMENTED; \ static_assert(true, \ "This assert is used to counter the false positive extra " \ @@ -1178,11 +1178,11 @@ GKO_BIND_CUSPARSE64_CSRSM_SOLVE(ValueType, detail::not_implemented); template void create_identity_permutation(cusparseHandle_t handle, IndexType size, - IndexType *permutation) GKO_NOT_IMPLEMENTED; + IndexType* permutation) GKO_NOT_IMPLEMENTED; template <> inline void create_identity_permutation(cusparseHandle_t handle, - int32 size, int32 *permutation) + int32 size, int32* permutation) { GKO_ASSERT_NO_CUSPARSE_ERRORS( cusparseCreateIdentityPermutation(handle, size, permutation)); @@ -1191,16 +1191,16 @@ inline void create_identity_permutation(cusparseHandle_t handle, template void csrsort_buffer_size(cusparseHandle_t handle, IndexType m, IndexType n, - IndexType nnz, const IndexType *row_ptrs, - const IndexType *col_idxs, - size_type &buffer_size) GKO_NOT_IMPLEMENTED; + IndexType nnz, const IndexType* row_ptrs, + const IndexType* col_idxs, + size_type& buffer_size) GKO_NOT_IMPLEMENTED; template <> inline void csrsort_buffer_size(cusparseHandle_t handle, int32 m, int32 n, int32 nnz, - const int32 *row_ptrs, - const int32 *col_idxs, - size_type &buffer_size) + const int32* row_ptrs, + const int32* col_idxs, + size_type& buffer_size) { GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseXcsrsort_bufferSizeExt( handle, m, n, nnz, row_ptrs, col_idxs, &buffer_size)); @@ -1209,15 +1209,15 @@ inline void csrsort_buffer_size(cusparseHandle_t handle, int32 m, template void csrsort(cusparseHandle_t handle, IndexType m, IndexType n, IndexType nnz, - const cusparseMatDescr_t descr, const IndexType *row_ptrs, - IndexType *col_idxs, IndexType *permutation, - void *buffer) GKO_NOT_IMPLEMENTED; + const cusparseMatDescr_t descr, const IndexType* row_ptrs, + IndexType* col_idxs, IndexType* permutation, + void* buffer) GKO_NOT_IMPLEMENTED; template <> inline void csrsort(cusparseHandle_t handle, int32 m, int32 n, int32 nnz, const cusparseMatDescr_t descr, - const int32 *row_ptrs, int32 *col_idxs, - int32 *permutation, void *buffer) + const int32* row_ptrs, int32* col_idxs, + int32* permutation, void* buffer) { GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseXcsrsort( handle, m, n, nnz, descr, row_ptrs, col_idxs, permutation, buffer)); @@ -1228,14 +1228,14 @@ inline void csrsort(cusparseHandle_t handle, int32 m, int32 n, int32 nnz, template -void gather(cusparseHandle_t handle, IndexType nnz, const ValueType *in, - ValueType *out, const IndexType *permutation) GKO_NOT_IMPLEMENTED; +void gather(cusparseHandle_t handle, IndexType nnz, const ValueType* in, + ValueType* out, const IndexType* permutation) GKO_NOT_IMPLEMENTED; #define GKO_BIND_CUSPARSE_GATHER(ValueType, CusparseName) \ template <> \ inline void gather(cusparseHandle_t handle, int32 nnz, \ - const ValueType *in, ValueType *out, \ - const int32 *permutation) \ + const ValueType* in, ValueType* out, \ + const int32* permutation) \ { \ GKO_ASSERT_NO_CUSPARSE_ERRORS( \ CusparseName(handle, nnz, as_culibs_type(in), as_culibs_type(out), \ @@ -1268,23 +1268,23 @@ inline void gather(cusparseHandle_t handle, cusparseDnVecDescr_t in, template void ilu0_buffer_size(cusparseHandle_t handle, IndexType m, IndexType nnz, - const cusparseMatDescr_t descr, const ValueType *vals, - const IndexType *row_ptrs, const IndexType *col_idxs, + const cusparseMatDescr_t descr, const ValueType* vals, + const IndexType* row_ptrs, const IndexType* col_idxs, csrilu02Info_t info, - size_type &buffer_size) GKO_NOT_IMPLEMENTED; + size_type& buffer_size) GKO_NOT_IMPLEMENTED; #define GKO_BIND_CUSPARSE_ILU0_BUFFER_SIZE(ValueType, CusparseName) \ template <> \ inline void ilu0_buffer_size( \ cusparseHandle_t handle, int32 m, int32 nnz, \ - const cusparseMatDescr_t descr, const ValueType *vals, \ - const int32 *row_ptrs, const int32 *col_idxs, csrilu02Info_t info, \ - size_type &buffer_size) \ + const cusparseMatDescr_t descr, const ValueType* vals, \ + const int32* row_ptrs, const int32* col_idxs, csrilu02Info_t info, \ + size_type& buffer_size) \ { \ int tmp_buffer_size{}; \ GKO_ASSERT_NO_CUSPARSE_ERRORS( \ CusparseName(handle, m, nnz, descr, \ - as_culibs_type(const_cast(vals)), \ + as_culibs_type(const_cast(vals)), \ row_ptrs, col_idxs, info, &tmp_buffer_size)); \ buffer_size = tmp_buffer_size; \ } \ @@ -1304,18 +1304,18 @@ GKO_BIND_CUSPARSE_ILU0_BUFFER_SIZE(std::complex, template void ilu0_analysis(cusparseHandle_t handle, IndexType m, IndexType nnz, - const cusparseMatDescr_t descr, const ValueType *vals, - const IndexType *row_ptrs, const IndexType *col_idxs, + const cusparseMatDescr_t descr, const ValueType* vals, + const IndexType* row_ptrs, const IndexType* col_idxs, csrilu02Info_t info, cusparseSolvePolicy_t policy, - void *buffer) GKO_NOT_IMPLEMENTED; + void* buffer) GKO_NOT_IMPLEMENTED; #define GKO_BIND_CUSPARSE_ILU0_ANALYSIS(ValueType, CusparseName) \ template <> \ inline void ilu0_analysis( \ cusparseHandle_t handle, int32 m, int32 nnz, \ - const cusparseMatDescr_t descr, const ValueType *vals, \ - const int32 *row_ptrs, const int32 *col_idxs, csrilu02Info_t info, \ - cusparseSolvePolicy_t policy, void *buffer) \ + const cusparseMatDescr_t descr, const ValueType* vals, \ + const int32* row_ptrs, const int32* col_idxs, csrilu02Info_t info, \ + cusparseSolvePolicy_t policy, void* buffer) \ { \ GKO_ASSERT_NO_CUSPARSE_ERRORS( \ CusparseName(handle, m, nnz, descr, as_culibs_type(vals), \ @@ -1337,18 +1337,18 @@ GKO_BIND_CUSPARSE_ILU0_ANALYSIS(std::complex, template void ilu0(cusparseHandle_t handle, IndexType m, IndexType nnz, - const cusparseMatDescr_t descr, ValueType *vals, - const IndexType *row_ptrs, const IndexType *col_idxs, + const cusparseMatDescr_t descr, ValueType* vals, + const IndexType* row_ptrs, const IndexType* col_idxs, csrilu02Info_t info, cusparseSolvePolicy_t policy, - void *buffer) GKO_NOT_IMPLEMENTED; + void* buffer) GKO_NOT_IMPLEMENTED; #define GKO_BIND_CUSPARSE_ILU0(ValueType, CusparseName) \ template <> \ inline void ilu0( \ cusparseHandle_t handle, int32 m, int32 nnz, \ - const cusparseMatDescr_t descr, ValueType *vals, \ - const int32 *row_ptrs, const int32 *col_idxs, csrilu02Info_t info, \ - cusparseSolvePolicy_t policy, void *buffer) \ + const cusparseMatDescr_t descr, ValueType* vals, \ + const int32* row_ptrs, const int32* col_idxs, csrilu02Info_t info, \ + cusparseSolvePolicy_t policy, void* buffer) \ { \ GKO_ASSERT_NO_CUSPARSE_ERRORS( \ CusparseName(handle, m, nnz, descr, as_culibs_type(vals), \ @@ -1368,23 +1368,23 @@ GKO_BIND_CUSPARSE_ILU0(std::complex, cusparseZcsrilu02); template void ic0_buffer_size(cusparseHandle_t handle, IndexType m, IndexType nnz, - const cusparseMatDescr_t descr, const ValueType *vals, - const IndexType *row_ptrs, const IndexType *col_idxs, + const cusparseMatDescr_t descr, const ValueType* vals, + const IndexType* row_ptrs, const IndexType* col_idxs, csric02Info_t info, - size_type &buffer_size) GKO_NOT_IMPLEMENTED; + size_type& buffer_size) GKO_NOT_IMPLEMENTED; #define GKO_BIND_CUSPARSE_IC0_BUFFER_SIZE(ValueType, CusparseName) \ template <> \ inline void ic0_buffer_size( \ cusparseHandle_t handle, int32 m, int32 nnz, \ - const cusparseMatDescr_t descr, const ValueType *vals, \ - const int32 *row_ptrs, const int32 *col_idxs, csric02Info_t info, \ - size_type &buffer_size) \ + const cusparseMatDescr_t descr, const ValueType* vals, \ + const int32* row_ptrs, const int32* col_idxs, csric02Info_t info, \ + size_type& buffer_size) \ { \ int tmp_buffer_size{}; \ GKO_ASSERT_NO_CUSPARSE_ERRORS( \ CusparseName(handle, m, nnz, descr, \ - as_culibs_type(const_cast(vals)), \ + as_culibs_type(const_cast(vals)), \ row_ptrs, col_idxs, info, &tmp_buffer_size)); \ buffer_size = tmp_buffer_size; \ } \ @@ -1404,18 +1404,18 @@ GKO_BIND_CUSPARSE_IC0_BUFFER_SIZE(std::complex, template void ic0_analysis(cusparseHandle_t handle, IndexType m, IndexType nnz, - const cusparseMatDescr_t descr, const ValueType *vals, - const IndexType *row_ptrs, const IndexType *col_idxs, + const cusparseMatDescr_t descr, const ValueType* vals, + const IndexType* row_ptrs, const IndexType* col_idxs, csric02Info_t info, cusparseSolvePolicy_t policy, - void *buffer) GKO_NOT_IMPLEMENTED; + void* buffer) GKO_NOT_IMPLEMENTED; #define GKO_BIND_CUSPARSE_IC0_ANALYSIS(ValueType, CusparseName) \ template <> \ inline void ic0_analysis( \ cusparseHandle_t handle, int32 m, int32 nnz, \ - const cusparseMatDescr_t descr, const ValueType *vals, \ - const int32 *row_ptrs, const int32 *col_idxs, csric02Info_t info, \ - cusparseSolvePolicy_t policy, void *buffer) \ + const cusparseMatDescr_t descr, const ValueType* vals, \ + const int32* row_ptrs, const int32* col_idxs, csric02Info_t info, \ + cusparseSolvePolicy_t policy, void* buffer) \ { \ GKO_ASSERT_NO_CUSPARSE_ERRORS( \ CusparseName(handle, m, nnz, descr, as_culibs_type(vals), \ @@ -1435,18 +1435,18 @@ GKO_BIND_CUSPARSE_IC0_ANALYSIS(std::complex, cusparseZcsric02_analysis); template void ic0(cusparseHandle_t handle, IndexType m, IndexType nnz, - const cusparseMatDescr_t descr, ValueType *vals, - const IndexType *row_ptrs, const IndexType *col_idxs, + const cusparseMatDescr_t descr, ValueType* vals, + const IndexType* row_ptrs, const IndexType* col_idxs, csric02Info_t info, cusparseSolvePolicy_t policy, - void *buffer) GKO_NOT_IMPLEMENTED; + void* buffer) GKO_NOT_IMPLEMENTED; #define GKO_BIND_CUSPARSE_IC0(ValueType, CusparseName) \ template <> \ inline void ic0( \ cusparseHandle_t handle, int32 m, int32 nnz, \ - const cusparseMatDescr_t descr, ValueType *vals, \ - const int32 *row_ptrs, const int32 *col_idxs, csric02Info_t info, \ - cusparseSolvePolicy_t policy, void *buffer) \ + const cusparseMatDescr_t descr, ValueType* vals, \ + const int32* row_ptrs, const int32* col_idxs, csric02Info_t info, \ + cusparseSolvePolicy_t policy, void* buffer) \ { \ GKO_ASSERT_NO_CUSPARSE_ERRORS( \ CusparseName(handle, m, nnz, descr, as_culibs_type(vals), \ diff --git a/cuda/base/device_guard.hpp b/cuda/base/device_guard.hpp index 63928671953..3bf9f1eac5d 100644 --- a/cuda/base/device_guard.hpp +++ b/cuda/base/device_guard.hpp @@ -63,13 +63,13 @@ class device_guard { GKO_ASSERT_NO_CUDA_ERRORS(cudaSetDevice(device_id)); } - device_guard(device_guard &other) = delete; + device_guard(device_guard& other) = delete; - device_guard &operator=(const device_guard &other) = delete; + device_guard& operator=(const device_guard& other) = delete; - device_guard(device_guard &&other) = delete; + device_guard(device_guard&& other) = delete; - device_guard const &operator=(device_guard &&other) = delete; + device_guard const& operator=(device_guard&& other) = delete; ~device_guard() noexcept(false) { diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp index b256cb93a2c..0b74fdd755e 100644 --- a/cuda/base/executor.cpp +++ b/cuda/base/executor.cpp @@ -63,12 +63,12 @@ std::shared_ptr CudaExecutor::create( return std::shared_ptr( new CudaExecutor(device_id, std::move(master), device_reset, alloc_mode), - [device_id](CudaExecutor *exec) { + [device_id](CudaExecutor* exec) { auto device_reset = exec->get_device_reset(); std::lock_guard guard( nvidia_device::get_mutex(device_id)); delete exec; - auto &num_execs = nvidia_device::get_num_execs(device_id); + auto& num_execs = nvidia_device::get_num_execs(device_id); num_execs--; if (!num_execs && device_reset) { cuda::device_guard g(device_id); @@ -78,7 +78,7 @@ std::shared_ptr CudaExecutor::create( } -void CudaExecutor::populate_exec_info(const MachineTopology *mach_topo) +void CudaExecutor::populate_exec_info(const MachineTopology* mach_topo) { if (this->get_device_id() < this->get_num_devices() && this->get_device_id() >= 0) { @@ -98,8 +98,8 @@ void CudaExecutor::populate_exec_info(const MachineTopology *mach_topo) } -void OmpExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void OmpExecutor::raw_copy_to(const CudaExecutor* dest, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { if (num_bytes > 0) { cuda::device_guard g(dest->get_device_id()); @@ -109,7 +109,7 @@ void OmpExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes, } -void CudaExecutor::raw_free(void *ptr) const noexcept +void CudaExecutor::raw_free(void* ptr) const noexcept { cuda::device_guard g(this->get_device_id()); auto error_code = cudaFree(ptr); @@ -127,9 +127,9 @@ void CudaExecutor::raw_free(void *ptr) const noexcept } -void *CudaExecutor::raw_alloc(size_type num_bytes) const +void* CudaExecutor::raw_alloc(size_type num_bytes) const { - void *dev_ptr = nullptr; + void* dev_ptr = nullptr; cuda::device_guard g(this->get_device_id()); int error_code = 0; if (this->alloc_mode_ == allocation_mode::unified_host) { @@ -150,8 +150,8 @@ void *CudaExecutor::raw_alloc(size_type num_bytes) const } -void CudaExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void CudaExecutor::raw_copy_to(const OmpExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { if (num_bytes > 0) { cuda::device_guard g(this->get_device_id()); @@ -161,8 +161,8 @@ void CudaExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes, } -void CudaExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void CudaExecutor::raw_copy_to(const HipExecutor* dest, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { #if GINKGO_HIP_PLATFORM_NVCC == 1 if (num_bytes > 0) { @@ -177,15 +177,15 @@ void CudaExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes, } -void CudaExecutor::raw_copy_to(const DpcppExecutor *dest, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void CudaExecutor::raw_copy_to(const DpcppExecutor* dest, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { GKO_NOT_SUPPORTED(dest); } -void CudaExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void CudaExecutor::raw_copy_to(const CudaExecutor* dest, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { if (num_bytes > 0) { cuda::device_guard g(this->get_device_id()); @@ -203,7 +203,7 @@ void CudaExecutor::synchronize() const } -void CudaExecutor::run(const Operation &op) const +void CudaExecutor::run(const Operation& op) const { this->template log(this, &op); cuda::device_guard g(this->get_device_id()); diff --git a/cuda/base/kernel_launch.cuh b/cuda/base/kernel_launch.cuh index 50650057ca6..d55faed5053 100644 --- a/cuda/base/kernel_launch.cuh +++ b/cuda/base/kernel_launch.cuh @@ -77,7 +77,7 @@ __global__ __launch_bounds__(default_block_size) void generic_kernel_2d( template void run_kernel(std::shared_ptr exec, KernelFunction fn, - size_type size, KernelArgs &&... args) + size_type size, KernelArgs&&... args) { gko::cuda::device_guard guard{exec->get_device_id()}; constexpr auto block_size = default_block_size; @@ -88,7 +88,7 @@ void run_kernel(std::shared_ptr exec, KernelFunction fn, template void run_kernel(std::shared_ptr exec, KernelFunction fn, - dim<2> size, KernelArgs &&... args) + dim<2> size, KernelArgs&&... args) { gko::cuda::device_guard guard{exec->get_device_id()}; constexpr auto block_size = default_block_size; diff --git a/cuda/base/kernel_launch_solver.cuh b/cuda/base/kernel_launch_solver.cuh index 0dcfe21aaa6..bf2f6e1a995 100644 --- a/cuda/base/kernel_launch_solver.cuh +++ b/cuda/base/kernel_launch_solver.cuh @@ -60,7 +60,7 @@ __global__ __launch_bounds__(default_block_size) void generic_kernel_2d_solver( template void run_kernel_solver(std::shared_ptr exec, KernelFunction fn, dim<2> size, size_type default_stride, - KernelArgs &&... args) + KernelArgs&&... args) { gko::cuda::device_guard guard{exec->get_device_id()}; constexpr auto block_size = default_block_size; diff --git a/cuda/base/pointer_mode_guard.hpp b/cuda/base/pointer_mode_guard.hpp index 72ac4f372d7..34923d21abf 100644 --- a/cuda/base/pointer_mode_guard.hpp +++ b/cuda/base/pointer_mode_guard.hpp @@ -61,20 +61,20 @@ namespace cublas { */ class pointer_mode_guard { public: - pointer_mode_guard(cublasHandle_t &handle) + pointer_mode_guard(cublasHandle_t& handle) { l_handle = &handle; GKO_ASSERT_NO_CUBLAS_ERRORS( cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_HOST)); } - pointer_mode_guard(pointer_mode_guard &other) = delete; + pointer_mode_guard(pointer_mode_guard& other) = delete; - pointer_mode_guard &operator=(const pointer_mode_guard &other) = delete; + pointer_mode_guard& operator=(const pointer_mode_guard& other) = delete; - pointer_mode_guard(pointer_mode_guard &&other) = delete; + pointer_mode_guard(pointer_mode_guard&& other) = delete; - pointer_mode_guard const &operator=(pointer_mode_guard &&other) = delete; + pointer_mode_guard const& operator=(pointer_mode_guard&& other) = delete; ~pointer_mode_guard() noexcept(false) { @@ -88,7 +88,7 @@ class pointer_mode_guard { } private: - cublasHandle_t *l_handle; + cublasHandle_t* l_handle; }; @@ -108,20 +108,20 @@ namespace cusparse { */ class pointer_mode_guard { public: - pointer_mode_guard(cusparseHandle_t &handle) + pointer_mode_guard(cusparseHandle_t& handle) { l_handle = &handle; GKO_ASSERT_NO_CUSPARSE_ERRORS( cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST)); } - pointer_mode_guard(pointer_mode_guard &other) = delete; + pointer_mode_guard(pointer_mode_guard& other) = delete; - pointer_mode_guard &operator=(const pointer_mode_guard &other) = delete; + pointer_mode_guard& operator=(const pointer_mode_guard& other) = delete; - pointer_mode_guard(pointer_mode_guard &&other) = delete; + pointer_mode_guard(pointer_mode_guard&& other) = delete; - pointer_mode_guard const &operator=(pointer_mode_guard &&other) = delete; + pointer_mode_guard const& operator=(pointer_mode_guard&& other) = delete; ~pointer_mode_guard() noexcept(false) { @@ -135,7 +135,7 @@ class pointer_mode_guard { } private: - cusparseHandle_t *l_handle; + cusparseHandle_t* l_handle; }; diff --git a/cuda/base/types.hpp b/cuda/base/types.hpp index 54309b3ecaa..c79e7a10dbb 100644 --- a/cuda/base/types.hpp +++ b/cuda/base/types.hpp @@ -62,13 +62,13 @@ struct culibs_type_impl { }; template -struct culibs_type_impl { - using type = typename culibs_type_impl::type *; +struct culibs_type_impl { + using type = typename culibs_type_impl::type*; }; template -struct culibs_type_impl { - using type = typename culibs_type_impl::type &; +struct culibs_type_impl { + using type = typename culibs_type_impl::type&; }; template @@ -102,13 +102,13 @@ struct cuda_type_impl { }; template -struct cuda_type_impl { - using type = typename cuda_type_impl::type *; +struct cuda_type_impl { + using type = typename cuda_type_impl::type*; }; template -struct cuda_type_impl { - using type = typename cuda_type_impl::type &; +struct cuda_type_impl { + using type = typename cuda_type_impl::type&; }; template @@ -301,7 +301,7 @@ inline std::enable_if_t< !std::is_pointer::value && !std::is_reference::value, cuda_type> as_cuda_type(T val) { - return *reinterpret_cast *>(&val); + return *reinterpret_cast*>(&val); } diff --git a/cuda/components/absolute_array.cu b/cuda/components/absolute_array.cu index 2eb62b229a7..1c26dab9640 100644 --- a/cuda/components/absolute_array.cu +++ b/cuda/components/absolute_array.cu @@ -51,7 +51,7 @@ constexpr int default_block_size = 512; template void inplace_absolute_array(std::shared_ptr exec, - ValueType *data, size_type n) + ValueType* data, size_type n) { const dim3 block_size(default_block_size, 1, 1); const dim3 grid_size(ceildiv(n, block_size.x), 1, 1); @@ -64,8 +64,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_INPLACE_ABSOLUTE_ARRAY_KERNEL); template void outplace_absolute_array(std::shared_ptr exec, - const ValueType *in, size_type n, - remove_complex *out) + const ValueType* in, size_type n, + remove_complex* out) { const dim3 block_size(default_block_size, 1, 1); const dim3 grid_size(ceildiv(n, block_size.x), 1, 1); diff --git a/cuda/components/atomic.cuh b/cuda/components/atomic.cuh index 243a3ab507f..da954fde178 100644 --- a/cuda/components/atomic.cuh +++ b/cuda/components/atomic.cuh @@ -55,9 +55,9 @@ namespace cuda { * @note It is not 'real' complex atomic add operation */ __forceinline__ __device__ thrust::complex atomic_add( - thrust::complex *__restrict__ address, thrust::complex val) + thrust::complex* __restrict__ address, thrust::complex val) { - cuComplex *addr = reinterpret_cast(address); + cuComplex* addr = reinterpret_cast(address); // Separate to real part and imag part auto real = atomic_add(&(addr->x), val.real()); auto imag = atomic_add(&(addr->y), val.imag()); @@ -71,9 +71,9 @@ __forceinline__ __device__ thrust::complex atomic_add( * @note It is not 'real' complex atomic add operation */ __forceinline__ __device__ thrust::complex atomic_add( - thrust::complex *__restrict__ address, thrust::complex val) + thrust::complex* __restrict__ address, thrust::complex val) { - cuDoubleComplex *addr = reinterpret_cast(address); + cuDoubleComplex* addr = reinterpret_cast(address); // Separate to real part and imag part auto real = atomic_add(&(addr->x), val.real()); auto imag = atomic_add(&(addr->y), val.imag()); diff --git a/cuda/components/cooperative_groups.cuh b/cuda/components/cooperative_groups.cuh index 4e2305966b0..ebee68b54d1 100644 --- a/cuda/components/cooperative_groups.cuh +++ b/cuda/components/cooperative_groups.cuh @@ -328,12 +328,12 @@ public: #define GKO_ENABLE_SHUFFLE_OPERATION(_name, SelectorType) \ template \ - __device__ __forceinline__ ValueType _name(const ValueType &var, \ + __device__ __forceinline__ ValueType _name(const ValueType& var, \ SelectorType selector) const \ { \ return shuffle_impl( \ [this](uint32 v, SelectorType s) { \ - return static_cast(this)->_name(v, s); \ + return static_cast(this)->_name(v, s); \ }, \ var, selector); \ } @@ -356,8 +356,8 @@ private: "Unable to shuffle sizes which are not 4-byte multiples"); constexpr auto value_size = sizeof(ValueType) / sizeof(uint32); ValueType result; - auto var_array = reinterpret_cast(&var); - auto result_array = reinterpret_cast(&result); + auto var_array = reinterpret_cast(&var); + auto result_array = reinterpret_cast(&result); #pragma unroll for (std::size_t i = 0; i < value_size; ++i) { result_array[i] = intrinsic_shuffle(var_array[i], selector); @@ -491,7 +491,7 @@ using cooperative_groups::group_size; // Need to implement our own tiled_partition functions to make sure they return // our extended version of the thread_block_tile in the templated case. template -__device__ __forceinline__ auto tiled_partition(const Group &g) +__device__ __forceinline__ auto tiled_partition(const Group& g) -> decltype(cooperative_groups::tiled_partition(g)) { return cooperative_groups::tiled_partition(g); @@ -510,7 +510,7 @@ __device__ __forceinline__ std::enable_if_t<(Size <= kernels::cuda::config::warp_size) && (Size > 0) && (kernels::cuda::config::warp_size % Size == 0), thread_block_tile> - tiled_partition(const Group &) + tiled_partition(const Group&) { return thread_block_tile(); } @@ -524,7 +524,7 @@ __device__ __forceinline__ // parent group type. template __device__ __forceinline__ thread_block_tile tiled_partition( - const Group &g) + const Group& g) { return cooperative_groups::tiled_partition(g); } diff --git a/cuda/components/fill_array.cu b/cuda/components/fill_array.cu index fd5b69b54bc..e206994a00b 100644 --- a/cuda/components/fill_array.cu +++ b/cuda/components/fill_array.cu @@ -50,7 +50,7 @@ constexpr int default_block_size = 512; template -void fill_array(std::shared_ptr exec, ValueType *array, +void fill_array(std::shared_ptr exec, ValueType* array, size_type n, ValueType val) { const dim3 block_size(default_block_size, 1, 1); @@ -64,7 +64,7 @@ GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_FILL_ARRAY_KERNEL); template void fill_seq_array(std::shared_ptr exec, - ValueType *array, size_type n) + ValueType* array, size_type n) { const dim3 block_size(default_block_size, 1, 1); const dim3 grid_size(ceildiv(n, block_size.x), 1, 1); diff --git a/cuda/components/format_conversion.cuh b/cuda/components/format_conversion.cuh index 3586627451d..25957e0d578 100644 --- a/cuda/components/format_conversion.cuh +++ b/cuda/components/format_conversion.cuh @@ -62,8 +62,8 @@ namespace kernel { template __global__ void count_nnz_per_row(size_type num_rows, size_type max_nnz_per_row, size_type stride, - const ValueType *__restrict__ values, - IndexType *__restrict__ result); + const ValueType* __restrict__ values, + IndexType* __restrict__ result); } // namespace kernel @@ -80,9 +80,9 @@ namespace kernel { * It converts the row index of Coo to the row pointer of Csr. */ template -__global__ void convert_row_idxs_to_ptrs(const IndexType *__restrict__ idxs, +__global__ void convert_row_idxs_to_ptrs(const IndexType* __restrict__ idxs, size_type num_nonzeros, - IndexType *__restrict__ ptrs, + IndexType* __restrict__ ptrs, size_type length); diff --git a/cuda/components/prefix_sum.cu b/cuda/components/prefix_sum.cu index ce108fa8cf9..3d23dceafae 100644 --- a/cuda/components/prefix_sum.cu +++ b/cuda/components/prefix_sum.cu @@ -46,7 +46,7 @@ constexpr int prefix_sum_block_size = 512; template -void prefix_sum(std::shared_ptr exec, IndexType *counts, +void prefix_sum(std::shared_ptr exec, IndexType* counts, size_type num_entries) { // prefix_sum should only be performed on a valid array diff --git a/cuda/components/reduction.cuh b/cuda/components/reduction.cuh index 9aace4fe6bd..95ac3d8a417 100644 --- a/cuda/components/reduction.cuh +++ b/cuda/components/reduction.cuh @@ -70,7 +70,7 @@ constexpr int default_block_size = 512; */ template __host__ ValueType reduce_add_array(std::shared_ptr exec, - size_type size, const ValueType *source) + size_type size, const ValueType* source) { auto block_results_val = source; size_type grid_dim = size; diff --git a/cuda/factorization/factorization_kernels.cu b/cuda/factorization/factorization_kernels.cu index 065d59f892a..9370c6ce503 100644 --- a/cuda/factorization/factorization_kernels.cu +++ b/cuda/factorization/factorization_kernels.cu @@ -65,7 +65,7 @@ constexpr int default_block_size{512}; template void add_diagonal_elements(std::shared_ptr exec, - matrix::Csr *mtx, + matrix::Csr* mtx, bool is_sorted) { // TODO: Runtime can be optimized by choosing a appropriate size for the @@ -145,8 +145,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_row_ptrs_l_u( std::shared_ptr exec, - const matrix::Csr *system_matrix, - IndexType *l_row_ptrs, IndexType *u_row_ptrs) + const matrix::Csr* system_matrix, + IndexType* l_row_ptrs, IndexType* u_row_ptrs) { const size_type num_rows{system_matrix->get_size()[0]}; @@ -171,9 +171,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_l_u(std::shared_ptr exec, - const matrix::Csr *system_matrix, - matrix::Csr *csr_l, - matrix::Csr *csr_u) + const matrix::Csr* system_matrix, + matrix::Csr* csr_l, + matrix::Csr* csr_u) { const size_type num_rows{system_matrix->get_size()[0]}; const dim3 block_size{default_block_size, 1, 1}; @@ -198,8 +198,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_row_ptrs_l( std::shared_ptr exec, - const matrix::Csr *system_matrix, - IndexType *l_row_ptrs) + const matrix::Csr* system_matrix, + IndexType* l_row_ptrs) { const size_type num_rows{system_matrix->get_size()[0]}; @@ -223,8 +223,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_l(std::shared_ptr exec, - const matrix::Csr *system_matrix, - matrix::Csr *csr_l, bool diag_sqrt) + const matrix::Csr* system_matrix, + matrix::Csr* csr_l, bool diag_sqrt) { const size_type num_rows{system_matrix->get_size()[0]}; const dim3 block_size{default_block_size, 1, 1}; diff --git a/cuda/factorization/ic_kernels.cu b/cuda/factorization/ic_kernels.cu index 9eb5c906cfb..7c62f8be83b 100644 --- a/cuda/factorization/ic_kernels.cu +++ b/cuda/factorization/ic_kernels.cu @@ -53,7 +53,7 @@ namespace ic_factorization { template void compute(std::shared_ptr exec, - matrix::Csr *m) + matrix::Csr* m) { const auto id = exec->get_device_id(); auto handle = exec->get_cusparse_handle(); diff --git a/cuda/factorization/ilu_kernels.cu b/cuda/factorization/ilu_kernels.cu index 954843ebf1e..5866d7630b4 100644 --- a/cuda/factorization/ilu_kernels.cu +++ b/cuda/factorization/ilu_kernels.cu @@ -53,7 +53,7 @@ namespace ilu_factorization { template void compute_lu(std::shared_ptr exec, - matrix::Csr *m) + matrix::Csr* m) { const auto id = exec->get_device_id(); auto handle = exec->get_cusparse_handle(); diff --git a/cuda/factorization/par_ic_kernels.cu b/cuda/factorization/par_ic_kernels.cu index 957eca09932..31bf4edce8d 100644 --- a/cuda/factorization/par_ic_kernels.cu +++ b/cuda/factorization/par_ic_kernels.cu @@ -67,7 +67,7 @@ using compiled_kernels = template void init_factor(std::shared_ptr exec, - matrix::Csr *l) + matrix::Csr* l) { auto num_rows = l->get_size()[0]; auto num_blocks = ceildiv(num_rows, default_block_size); @@ -84,8 +84,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void compute_factor(std::shared_ptr exec, size_type iterations, - const matrix::Coo *a_lower, - matrix::Csr *l) + const matrix::Coo* a_lower, + matrix::Csr* l) { auto nnz = l->get_num_stored_elements(); auto num_blocks = ceildiv(nnz, default_block_size); diff --git a/cuda/factorization/par_ict_kernels.cu b/cuda/factorization/par_ict_kernels.cu index fe44da01632..84d7a22acb6 100644 --- a/cuda/factorization/par_ict_kernels.cu +++ b/cuda/factorization/par_ict_kernels.cu @@ -83,10 +83,10 @@ namespace { template void add_candidates(syn::value_list, std::shared_ptr exec, - const matrix::Csr *llh, - const matrix::Csr *a, - const matrix::Csr *l, - matrix::Csr *l_new) + const matrix::Csr* llh, + const matrix::Csr* a, + const matrix::Csr* l, + matrix::Csr* l_new) { auto num_rows = static_cast(llh->get_size()[0]); auto subwarps_per_block = default_block_size / subwarp_size; @@ -135,9 +135,9 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_add_candidates, add_candidates); template void compute_factor(syn::value_list, std::shared_ptr exec, - const matrix::Csr *a, - matrix::Csr *l, - const matrix::Coo *l_coo) + const matrix::Csr* a, + matrix::Csr* l, + const matrix::Coo* l_coo) { auto total_nnz = static_cast(l->get_num_stored_elements()); auto block_size = default_block_size / subwarp_size; @@ -159,10 +159,10 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_compute_factor, compute_factor); template void add_candidates(std::shared_ptr exec, - const matrix::Csr *llh, - const matrix::Csr *a, - const matrix::Csr *l, - matrix::Csr *l_new) + const matrix::Csr* llh, + const matrix::Csr* a, + const matrix::Csr* l, + matrix::Csr* l_new) { auto num_rows = a->get_size()[0]; auto total_nnz = @@ -183,9 +183,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void compute_factor(std::shared_ptr exec, - const matrix::Csr *a, - matrix::Csr *l, - const matrix::Coo *l_coo) + const matrix::Csr* a, + matrix::Csr* l, + const matrix::Coo* l_coo) { auto num_rows = a->get_size()[0]; auto total_nnz = 2 * l->get_num_stored_elements(); diff --git a/cuda/factorization/par_ilu_kernels.cu b/cuda/factorization/par_ilu_kernels.cu index 80a0125bae2..7476bb60d35 100644 --- a/cuda/factorization/par_ilu_kernels.cu +++ b/cuda/factorization/par_ilu_kernels.cu @@ -61,9 +61,9 @@ constexpr int default_block_size{512}; template void compute_l_u_factors(std::shared_ptr exec, size_type iterations, - const matrix::Coo *system_matrix, - matrix::Csr *l_factor, - matrix::Csr *u_factor) + const matrix::Coo* system_matrix, + matrix::Csr* l_factor, + matrix::Csr* u_factor) { iterations = (iterations == 0) ? 10 : iterations; const auto num_elements = system_matrix->get_num_stored_elements(); diff --git a/cuda/factorization/par_ilut_approx_filter_kernel.cu b/cuda/factorization/par_ilut_approx_filter_kernel.cu index 1a286c98447..c19d4381306 100644 --- a/cuda/factorization/par_ilut_approx_filter_kernel.cu +++ b/cuda/factorization/par_ilut_approx_filter_kernel.cu @@ -83,11 +83,11 @@ using compiled_kernels = template void threshold_filter_approx(syn::value_list, std::shared_ptr exec, - const matrix::Csr *m, - IndexType rank, Array *tmp, - remove_complex *threshold, - matrix::Csr *m_out, - matrix::Coo *m_out_coo) + const matrix::Csr* m, + IndexType rank, Array* tmp, + remove_complex* threshold, + matrix::Csr* m_out, + matrix::Coo* m_out_coo) { auto values = m->get_const_values(); IndexType size = m->get_num_stored_elements(); @@ -108,14 +108,14 @@ void threshold_filter_approx(syn::value_list, tmp_size_totals + tmp_size_partials + tmp_size_oracles + tmp_size_tree; tmp->resize_and_reset(tmp_size); - auto total_counts = reinterpret_cast(tmp->get_data()); + auto total_counts = reinterpret_cast(tmp->get_data()); auto partial_counts = - reinterpret_cast(tmp->get_data() + tmp_size_totals); - auto oracles = reinterpret_cast( + reinterpret_cast(tmp->get_data() + tmp_size_totals); + auto oracles = reinterpret_cast( tmp->get_data() + tmp_size_totals + tmp_size_partials); auto tree = - reinterpret_cast(tmp->get_data() + tmp_size_totals + - tmp_size_partials + tmp_size_oracles); + reinterpret_cast(tmp->get_data() + tmp_size_totals + + tmp_size_partials + tmp_size_oracles); sampleselect_count(exec, values, size, tree, oracles, partial_counts, total_counts); @@ -153,7 +153,7 @@ void threshold_filter_approx(syn::value_list, builder.get_value_array().resize_and_reset(new_nnz); auto new_col_idxs = m_out->get_col_idxs(); auto new_vals = m_out->get_values(); - IndexType *new_row_idxs{}; + IndexType* new_row_idxs{}; if (m_out_coo) { matrix::CooBuilder coo_builder{m_out_coo}; coo_builder.get_row_idx_array().resize_and_reset(new_nnz); @@ -176,11 +176,11 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_threshold_filter_approx, template void threshold_filter_approx(std::shared_ptr exec, - const matrix::Csr *m, - IndexType rank, Array &tmp, - remove_complex &threshold, - matrix::Csr *m_out, - matrix::Coo *m_out_coo) + const matrix::Csr* m, + IndexType rank, Array& tmp, + remove_complex& threshold, + matrix::Csr* m_out, + matrix::Coo* m_out_coo) { auto num_rows = m->get_size()[0]; auto total_nnz = m->get_num_stored_elements(); diff --git a/cuda/factorization/par_ilut_filter_kernel.cu b/cuda/factorization/par_ilut_filter_kernel.cu index db11cb6feb9..482f8840148 100644 --- a/cuda/factorization/par_ilut_filter_kernel.cu +++ b/cuda/factorization/par_ilut_filter_kernel.cu @@ -81,10 +81,10 @@ namespace { template void threshold_filter(syn::value_list, std::shared_ptr exec, - const matrix::Csr *a, + const matrix::Csr* a, remove_complex threshold, - matrix::Csr *m_out, - matrix::Coo *m_out_coo, bool lower) + matrix::Csr* m_out, + matrix::Coo* m_out_coo, bool lower) { auto old_row_ptrs = a->get_const_row_ptrs(); auto old_col_idxs = a->get_const_col_idxs(); @@ -110,7 +110,7 @@ void threshold_filter(syn::value_list, builder.get_value_array().resize_and_reset(new_nnz); auto new_col_idxs = m_out->get_col_idxs(); auto new_vals = m_out->get_values(); - IndexType *new_row_idxs{}; + IndexType* new_row_idxs{}; if (m_out_coo) { matrix::CooBuilder coo_builder{m_out_coo}; coo_builder.get_row_idx_array().resize_and_reset(new_nnz); @@ -134,10 +134,10 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_threshold_filter, threshold_filter); template void threshold_filter(std::shared_ptr exec, - const matrix::Csr *a, + const matrix::Csr* a, remove_complex threshold, - matrix::Csr *m_out, - matrix::Coo *m_out_coo, bool lower) + matrix::Csr* m_out, + matrix::Coo* m_out_coo, bool lower) { auto num_rows = a->get_size()[0]; auto total_nnz = a->get_num_stored_elements(); diff --git a/cuda/factorization/par_ilut_select_common.cu b/cuda/factorization/par_ilut_select_common.cu index 1d2a94883c4..089798f7b65 100644 --- a/cuda/factorization/par_ilut_select_common.cu +++ b/cuda/factorization/par_ilut_select_common.cu @@ -60,9 +60,9 @@ namespace par_ilut_factorization { template void sampleselect_count(std::shared_ptr exec, - const ValueType *values, IndexType size, - remove_complex *tree, unsigned char *oracles, - IndexType *partial_counts, IndexType *total_counts) + const ValueType* values, IndexType size, + remove_complex* tree, unsigned char* oracles, + IndexType* partial_counts, IndexType* total_counts) { constexpr auto bucket_count = kernel::searchtree_width; auto num_threads_total = ceildiv(size, items_per_thread); @@ -85,17 +85,17 @@ void sampleselect_count(std::shared_ptr exec, #define DECLARE_SSSS_COUNT(ValueType, IndexType) \ void sampleselect_count(std::shared_ptr exec, \ - const ValueType *values, IndexType size, \ - remove_complex *tree, \ - unsigned char *oracles, IndexType *partial_counts, \ - IndexType *total_counts) + const ValueType* values, IndexType size, \ + remove_complex* tree, \ + unsigned char* oracles, IndexType* partial_counts, \ + IndexType* total_counts) GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(DECLARE_SSSS_COUNT); template sampleselect_bucket sampleselect_find_bucket( - std::shared_ptr exec, IndexType *prefix_sum, + std::shared_ptr exec, IndexType* prefix_sum, IndexType rank) { kernel::find_bucket<<<1, config::warp_size>>>(prefix_sum, rank); @@ -107,7 +107,7 @@ sampleselect_bucket sampleselect_find_bucket( #define DECLARE_SSSS_FIND_BUCKET(IndexType) \ sampleselect_bucket sampleselect_find_bucket( \ - std::shared_ptr exec, IndexType *prefix_sum, \ + std::shared_ptr exec, IndexType* prefix_sum, \ IndexType rank) GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(DECLARE_SSSS_FIND_BUCKET); diff --git a/cuda/factorization/par_ilut_select_common.cuh b/cuda/factorization/par_ilut_select_common.cuh index fe2ddef7401..b3fee23f839 100644 --- a/cuda/factorization/par_ilut_select_common.cuh +++ b/cuda/factorization/par_ilut_select_common.cuh @@ -51,9 +51,9 @@ constexpr int items_per_thread = 16; template void sampleselect_count(std::shared_ptr exec, - const ValueType *values, IndexType size, - remove_complex *tree, unsigned char *oracles, - IndexType *partial_counts, IndexType *total_counts); + const ValueType* values, IndexType size, + remove_complex* tree, unsigned char* oracles, + IndexType* partial_counts, IndexType* total_counts); template @@ -66,7 +66,7 @@ struct sampleselect_bucket { template sampleselect_bucket sampleselect_find_bucket( - std::shared_ptr exec, IndexType *prefix_sum, + std::shared_ptr exec, IndexType* prefix_sum, IndexType rank); diff --git a/cuda/factorization/par_ilut_select_kernel.cu b/cuda/factorization/par_ilut_select_kernel.cu index a471e55958a..e65015c0082 100644 --- a/cuda/factorization/par_ilut_select_kernel.cu +++ b/cuda/factorization/par_ilut_select_kernel.cu @@ -67,10 +67,10 @@ namespace par_ilut_factorization { template -void sampleselect_filter(const ValueType *values, IndexType size, - const unsigned char *oracles, - const IndexType *partial_counts, IndexType bucket, - remove_complex *out) +void sampleselect_filter(const ValueType* values, IndexType size, + const unsigned char* oracles, + const IndexType* partial_counts, IndexType bucket, + remove_complex* out) { auto num_threads_total = ceildiv(size, items_per_thread); auto num_blocks = @@ -83,10 +83,10 @@ void sampleselect_filter(const ValueType *values, IndexType size, template void threshold_select(std::shared_ptr exec, - const matrix::Csr *m, - IndexType rank, Array &tmp1, - Array> &tmp2, - remove_complex &threshold) + const matrix::Csr* m, + IndexType rank, Array& tmp1, + Array>& tmp2, + remove_complex& threshold) { auto values = m->get_const_values(); IndexType size = m->get_num_stored_elements(); @@ -110,14 +110,14 @@ void threshold_select(std::shared_ptr exec, tmp1.resize_and_reset(tmp_size); tmp2.resize_and_reset(tmp_size_vals); - auto total_counts = reinterpret_cast(tmp1.get_data()); + auto total_counts = reinterpret_cast(tmp1.get_data()); auto partial_counts = - reinterpret_cast(tmp1.get_data() + tmp_size_totals); - auto oracles = reinterpret_cast( + reinterpret_cast(tmp1.get_data() + tmp_size_totals); + auto oracles = reinterpret_cast( tmp1.get_data() + tmp_size_totals + tmp_size_partials); auto tree = - reinterpret_cast(tmp1.get_data() + tmp_size_totals + - tmp_size_partials + tmp_size_oracles); + reinterpret_cast(tmp1.get_data() + tmp_size_totals + + tmp_size_partials + tmp_size_oracles); sampleselect_count(exec, values, size, tree, oracles, partial_counts, total_counts); @@ -140,7 +140,7 @@ void threshold_select(std::shared_ptr exec, int step{}; while (bucket.size > kernel::basecase_size) { std::swap(tmp21, tmp22); - const auto *tmp_in = tmp21; + const auto* tmp_in = tmp21; auto tmp_out = tmp22; sampleselect_count(exec, tmp_in, bucket.size, tree, oracles, @@ -168,7 +168,7 @@ void threshold_select(std::shared_ptr exec, } // base case - auto out_ptr = reinterpret_cast(tmp1.get_data()); + auto out_ptr = reinterpret_cast(tmp1.get_data()); kernel::basecase_select<<<1, kernel::basecase_block_size>>>( tmp22, bucket.size, rank, out_ptr); threshold = exec->copy_val_to_host(out_ptr); diff --git a/cuda/factorization/par_ilut_spgeam_kernel.cu b/cuda/factorization/par_ilut_spgeam_kernel.cu index 074acc92084..f8bbdef2f90 100644 --- a/cuda/factorization/par_ilut_spgeam_kernel.cu +++ b/cuda/factorization/par_ilut_spgeam_kernel.cu @@ -82,12 +82,12 @@ namespace { template void add_candidates(syn::value_list, std::shared_ptr exec, - const matrix::Csr *lu, - const matrix::Csr *a, - const matrix::Csr *l, - const matrix::Csr *u, - matrix::Csr *l_new, - matrix::Csr *u_new) + const matrix::Csr* lu, + const matrix::Csr* a, + const matrix::Csr* l, + const matrix::Csr* u, + matrix::Csr* l_new, + matrix::Csr* u_new) { auto num_rows = static_cast(lu->get_size()[0]); auto subwarps_per_block = default_block_size / subwarp_size; @@ -148,12 +148,12 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_add_candidates, add_candidates); template void add_candidates(std::shared_ptr exec, - const matrix::Csr *lu, - const matrix::Csr *a, - const matrix::Csr *l, - const matrix::Csr *u, - matrix::Csr *l_new, - matrix::Csr *u_new) + const matrix::Csr* lu, + const matrix::Csr* a, + const matrix::Csr* l, + const matrix::Csr* u, + matrix::Csr* l_new, + matrix::Csr* u_new) { auto num_rows = a->get_size()[0]; auto total_nnz = diff --git a/cuda/factorization/par_ilut_sweep_kernel.cu b/cuda/factorization/par_ilut_sweep_kernel.cu index f5674942059..c2f63844afe 100644 --- a/cuda/factorization/par_ilut_sweep_kernel.cu +++ b/cuda/factorization/par_ilut_sweep_kernel.cu @@ -82,12 +82,12 @@ namespace { template void compute_l_u_factors(syn::value_list, std::shared_ptr exec, - const matrix::Csr *a, - matrix::Csr *l, - const matrix::Coo *l_coo, - matrix::Csr *u, - const matrix::Coo *u_coo, - matrix::Csr *u_csc) + const matrix::Csr* a, + matrix::Csr* l, + const matrix::Coo* l_coo, + matrix::Csr* u, + const matrix::Coo* u_coo, + matrix::Csr* u_csc) { auto total_nnz = static_cast(l->get_num_stored_elements() + u->get_num_stored_elements()); @@ -114,12 +114,12 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_compute_l_u_factors, template void compute_l_u_factors(std::shared_ptr exec, - const matrix::Csr *a, - matrix::Csr *l, - const matrix::Coo *l_coo, - matrix::Csr *u, - const matrix::Coo *u_coo, - matrix::Csr *u_csc) + const matrix::Csr* a, + matrix::Csr* l, + const matrix::Coo* l_coo, + matrix::Csr* u, + const matrix::Coo* u_coo, + matrix::Csr* u_csc) { auto num_rows = a->get_size()[0]; auto total_nnz = diff --git a/cuda/matrix/coo_kernels.cu b/cuda/matrix/coo_kernels.cu index f41a3551296..e215142389c 100644 --- a/cuda/matrix/coo_kernels.cu +++ b/cuda/matrix/coo_kernels.cu @@ -78,8 +78,8 @@ constexpr int spmv_block_size = warps_in_block * config::warp_size; template void spmv(std::shared_ptr exec, - const matrix::Coo *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Coo* a, + const matrix::Dense* b, matrix::Dense* c) { dense::fill(exec, c, zero()); spmv2(exec, a, b, c); @@ -90,11 +90,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Coo *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Coo* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { dense::scale(exec, beta, c); advanced_spmv2(exec, alpha, a, b, c); @@ -106,8 +106,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void spmv2(std::shared_ptr exec, - const matrix::Coo *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Coo* a, + const matrix::Dense* b, matrix::Dense* c) { const auto nnz = a->get_num_stored_elements(); const auto b_ncols = b->get_size()[1]; @@ -142,10 +142,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV2_KERNEL); template void advanced_spmv2(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Coo *a, - const matrix::Dense *b, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Coo* a, + const matrix::Dense* b, + matrix::Dense* c) { const auto nnz = a->get_num_stored_elements(); const auto nwarps = host_kernel::calculate_nwarps(exec, nnz); @@ -183,8 +183,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_row_idxs_to_ptrs(std::shared_ptr exec, - const IndexType *idxs, size_type num_nonzeros, - IndexType *ptrs, size_type length) + const IndexType* idxs, size_type num_nonzeros, + IndexType* ptrs, size_type length) { const auto grid_dim = ceildiv(num_nonzeros, default_block_size); @@ -195,8 +195,8 @@ void convert_row_idxs_to_ptrs(std::shared_ptr exec, template void convert_to_csr(std::shared_ptr exec, - const matrix::Coo *source, - matrix::Csr *result) + const matrix::Coo* source, + matrix::Csr* result) { auto num_rows = result->get_size()[0]; @@ -215,8 +215,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Coo *source, - matrix::Dense *result) + const matrix::Coo* source, + matrix::Dense* result) { const auto num_rows = result->get_size()[0]; const auto num_cols = result->get_size()[1]; diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu index c6b41ebd03b..a087274bc7b 100644 --- a/cuda/matrix/csr_kernels.cu +++ b/cuda/matrix/csr_kernels.cu @@ -105,11 +105,11 @@ namespace host_kernel { template void merge_path_spmv(syn::value_list, std::shared_ptr exec, - const matrix::Csr *a, - const matrix::Dense *b, - matrix::Dense *c, - const matrix::Dense *alpha = nullptr, - const matrix::Dense *beta = nullptr) + const matrix::Csr* a, + const matrix::Dense* b, + matrix::Dense* c, + const matrix::Dense* alpha = nullptr, + const matrix::Dense* beta = nullptr) { const IndexType total = a->get_size()[0] + a->get_num_stored_elements(); const IndexType grid_num = @@ -209,11 +209,11 @@ int compute_items_per_thread(std::shared_ptr exec) template void classical_spmv(syn::value_list, std::shared_ptr exec, - const matrix::Csr *a, - const matrix::Dense *b, - matrix::Dense *c, - const matrix::Dense *alpha = nullptr, - const matrix::Dense *beta = nullptr) + const matrix::Csr* a, + const matrix::Dense* b, + matrix::Dense* c, + const matrix::Dense* alpha = nullptr, + const matrix::Dense* beta = nullptr) { const auto nwarps = exec->get_num_warps_per_sm() * exec->get_num_multiprocessor() * classical_overweight; @@ -251,8 +251,8 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_classical_spmv, classical_spmv); template void spmv(std::shared_ptr exec, - const matrix::Csr *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Csr* a, + const matrix::Dense* b, matrix::Dense* c) { if (a->get_strategy()->get_name() == "load_balance") { components::fill_array(exec, c->get_values(), @@ -328,15 +328,13 @@ void spmv(std::shared_ptr exec, #else // CUDA_VERSION >= 11000 cusparseOperation_t trans = CUSPARSE_OPERATION_NON_TRANSPOSE; cusparseSpMVAlg_t alg = CUSPARSE_CSRMV_ALG1; - auto row_ptrs = - const_cast(a->get_const_row_ptrs()); - auto col_idxs = - const_cast(a->get_const_col_idxs()); - auto values = const_cast(a->get_const_values()); + auto row_ptrs = const_cast(a->get_const_row_ptrs()); + auto col_idxs = const_cast(a->get_const_col_idxs()); + auto values = const_cast(a->get_const_values()); auto mat = cusparse::create_csr( a->get_size()[0], a->get_size()[1], a->get_num_stored_elements(), row_ptrs, col_idxs, values); - auto b_val = const_cast(b->get_const_values()); + auto b_val = const_cast(b->get_const_values()); auto c_val = c->get_values(); auto vecb = cusparse::create_dnvec(b->get_num_stored_elements(), b_val); @@ -369,11 +367,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Csr *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Csr* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { if (a->get_strategy()->get_name() == "load_balance") { dense::scale(exec, beta, c); @@ -419,13 +417,13 @@ void advanced_spmv(std::shared_ptr exec, #else // CUDA_VERSION >= 11000 cusparseOperation_t trans = CUSPARSE_OPERATION_NON_TRANSPOSE; cusparseSpMVAlg_t alg = CUSPARSE_CSRMV_ALG1; - auto row_ptrs = const_cast(a->get_const_row_ptrs()); - auto col_idxs = const_cast(a->get_const_col_idxs()); - auto values = const_cast(a->get_const_values()); + auto row_ptrs = const_cast(a->get_const_row_ptrs()); + auto col_idxs = const_cast(a->get_const_col_idxs()); + auto values = const_cast(a->get_const_values()); auto mat = cusparse::create_csr(a->get_size()[0], a->get_size()[1], a->get_num_stored_elements(), row_ptrs, col_idxs, values); - auto b_val = const_cast(b->get_const_values()); + auto b_val = const_cast(b->get_const_values()); auto c_val = c->get_values(); auto vecb = cusparse::create_dnvec(b->get_num_stored_elements(), b_val); @@ -488,9 +486,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void spgemm(std::shared_ptr exec, - const matrix::Csr *a, - const matrix::Csr *b, - matrix::Csr *c) + const matrix::Csr* a, + const matrix::Csr* b, + matrix::Csr* c) { auto a_nnz = IndexType(a->get_num_stored_elements()); auto a_vals = a->get_const_values(); @@ -508,15 +506,15 @@ void spgemm(std::shared_ptr exec, auto alpha = one(); auto a_nnz = static_cast(a->get_num_stored_elements()); auto b_nnz = static_cast(b->get_num_stored_elements()); - auto null_value = static_cast(nullptr); - auto null_index = static_cast(nullptr); + auto null_value = static_cast(nullptr); + auto null_index = static_cast(nullptr); auto zero_nnz = IndexType{}; auto m = IndexType(a->get_size()[0]); auto n = IndexType(b->get_size()[1]); auto k = IndexType(a->get_size()[1]); matrix::CsrBuilder c_builder{c}; - auto &c_col_idxs_array = c_builder.get_col_idx_array(); - auto &c_vals_array = c_builder.get_value_array(); + auto& c_col_idxs_array = c_builder.get_col_idx_array(); + auto& c_vals_array = c_builder.get_value_array(); #if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) auto a_descr = cusparse::create_mat_descr(); @@ -560,14 +558,12 @@ void spgemm(std::shared_ptr exec, #else // CUDA_VERSION >= 11000 const auto beta = zero(); auto spgemm_descr = cusparse::create_spgemm_descr(); - auto a_descr = cusparse::create_csr(m, k, a_nnz, - const_cast(a_row_ptrs), - const_cast(a_col_idxs), - const_cast(a_vals)); - auto b_descr = cusparse::create_csr(k, n, b_nnz, - const_cast(b_row_ptrs), - const_cast(b_col_idxs), - const_cast(b_vals)); + auto a_descr = cusparse::create_csr( + m, k, a_nnz, const_cast(a_row_ptrs), + const_cast(a_col_idxs), const_cast(a_vals)); + auto b_descr = cusparse::create_csr( + k, n, b_nnz, const_cast(b_row_ptrs), + const_cast(b_col_idxs), const_cast(b_vals)); auto c_descr = cusparse::create_csr(m, n, zero_nnz, null_index, null_index, null_value); @@ -620,11 +616,11 @@ namespace { template void spgeam(syn::value_list, - std::shared_ptr exec, const ValueType *alpha, - const IndexType *a_row_ptrs, const IndexType *a_col_idxs, - const ValueType *a_vals, const ValueType *beta, - const IndexType *b_row_ptrs, const IndexType *b_col_idxs, - const ValueType *b_vals, matrix::Csr *c) + std::shared_ptr exec, const ValueType* alpha, + const IndexType* a_row_ptrs, const IndexType* a_col_idxs, + const ValueType* a_vals, const ValueType* beta, + const IndexType* b_row_ptrs, const IndexType* b_col_idxs, + const ValueType* b_vals, matrix::Csr* c) { auto m = static_cast(c->get_size()[0]); auto c_row_ptrs = c->get_row_ptrs(); @@ -658,12 +654,12 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_spgeam, spgeam); template void advanced_spgemm(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Csr *a, - const matrix::Csr *b, - const matrix::Dense *beta, - const matrix::Csr *d, - matrix::Csr *c) + const matrix::Dense* alpha, + const matrix::Csr* a, + const matrix::Csr* b, + const matrix::Dense* beta, + const matrix::Csr* d, + matrix::Csr* c) { if (cusparse::is_supported::value) { auto handle = exec->get_cusparse_handle(); @@ -690,8 +686,8 @@ void advanced_spgemm(std::shared_ptr exec, #if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) matrix::CsrBuilder c_builder{c}; - auto &c_col_idxs_array = c_builder.get_col_idx_array(); - auto &c_vals_array = c_builder.get_value_array(); + auto& c_col_idxs_array = c_builder.get_col_idx_array(); + auto& c_vals_array = c_builder.get_value_array(); auto a_descr = cusparse::create_mat_descr(); auto b_descr = cusparse::create_mat_descr(); auto c_descr = cusparse::create_mat_descr(); @@ -730,20 +726,18 @@ void advanced_spgemm(std::shared_ptr exec, cusparse::destroy(b_descr); cusparse::destroy(a_descr); #else // CUDA_VERSION >= 11000 - auto null_value = static_cast(nullptr); - auto null_index = static_cast(nullptr); + auto null_value = static_cast(nullptr); + auto null_index = static_cast(nullptr); auto one_val = one(); auto zero_val = zero(); auto zero_nnz = IndexType{}; auto spgemm_descr = cusparse::create_spgemm_descr(); - auto a_descr = cusparse::create_csr(m, k, a_nnz, - const_cast(a_row_ptrs), - const_cast(a_col_idxs), - const_cast(a_vals)); - auto b_descr = cusparse::create_csr(k, n, b_nnz, - const_cast(b_row_ptrs), - const_cast(b_col_idxs), - const_cast(b_vals)); + auto a_descr = cusparse::create_csr( + m, k, a_nnz, const_cast(a_row_ptrs), + const_cast(a_col_idxs), const_cast(a_vals)); + auto b_descr = cusparse::create_csr( + k, n, b_nnz, const_cast(b_row_ptrs), + const_cast(b_col_idxs), const_cast(b_vals)); auto c_descr = cusparse::create_csr(m, n, zero_nnz, null_index, null_index, null_value); @@ -809,11 +803,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void spgeam(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Csr *a, - const matrix::Dense *beta, - const matrix::Csr *b, - matrix::Csr *c) + const matrix::Dense* alpha, + const matrix::Csr* a, + const matrix::Dense* beta, + const matrix::Csr* b, + matrix::Csr* c) { auto total_nnz = a->get_num_stored_elements() + b->get_num_stored_elements(); @@ -836,8 +830,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL); template void convert_row_ptrs_to_idxs(std::shared_ptr exec, - const IndexType *ptrs, size_type num_rows, - IndexType *idxs) + const IndexType* ptrs, size_type num_rows, + IndexType* idxs) { const auto grid_dim = ceildiv(num_rows, default_block_size); @@ -848,8 +842,8 @@ void convert_row_ptrs_to_idxs(std::shared_ptr exec, template void convert_to_coo(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Coo *result) + const matrix::Csr* source, + matrix::Coo* result) { auto num_rows = result->get_size()[0]; @@ -865,8 +859,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Dense *result) + const matrix::Csr* source, + matrix::Dense* result) { const auto num_rows = result->get_size()[0]; const auto num_cols = result->get_size()[1]; @@ -894,8 +888,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sellp(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Sellp *result) + const matrix::Csr* source, + matrix::Sellp* result) { const auto num_rows = result->get_size()[0]; const auto num_cols = result->get_size()[1]; @@ -953,8 +947,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_ell(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Ell *result) + const matrix::Csr* source, + matrix::Ell* result) { const auto source_values = source->get_const_values(); const auto source_row_ptrs = source->get_const_row_ptrs(); @@ -989,8 +983,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_total_cols(std::shared_ptr exec, - const matrix::Csr *source, - size_type *result, size_type stride_factor, + const matrix::Csr* source, + size_type* result, size_type stride_factor, size_type slice_size) { const auto num_rows = source->get_size()[0]; @@ -1039,8 +1033,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void transpose(std::shared_ptr exec, - const matrix::Csr *orig, - matrix::Csr *trans) + const matrix::Csr* orig, + matrix::Csr* trans) { if (cusparse::is_supported::value) { #if defined(CUDA_VERSION) && (CUDA_VERSION < 11000) @@ -1087,8 +1081,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_TRANSPOSE_KERNEL); template void conj_transpose(std::shared_ptr exec, - const matrix::Csr *orig, - matrix::Csr *trans) + const matrix::Csr* orig, + matrix::Csr* trans) { if (cusparse::is_supported::value) { const dim3 block_size(default_block_size, 1, 1); @@ -1144,9 +1138,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inv_symm_permute(std::shared_ptr exec, - const IndexType *perm, - const matrix::Csr *orig, - matrix::Csr *permuted) + const IndexType* perm, + const matrix::Csr* orig, + matrix::Csr* permuted) { auto num_rows = orig->get_size()[0]; auto count_num_blocks = ceildiv(num_rows, default_block_size); @@ -1169,9 +1163,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void row_permute(std::shared_ptr exec, - const IndexType *perm, - const matrix::Csr *orig, - matrix::Csr *row_permuted) + const IndexType* perm, + const matrix::Csr* orig, + matrix::Csr* row_permuted) { auto num_rows = orig->get_size()[0]; auto count_num_blocks = ceildiv(num_rows, default_block_size); @@ -1195,9 +1189,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inverse_row_permute(std::shared_ptr exec, - const IndexType *perm, - const matrix::Csr *orig, - matrix::Csr *row_permuted) + const IndexType* perm, + const matrix::Csr* orig, + matrix::Csr* row_permuted) { auto num_rows = orig->get_size()[0]; auto count_num_blocks = ceildiv(num_rows, default_block_size); @@ -1221,8 +1215,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_max_nnz_per_row(std::shared_ptr exec, - const matrix::Csr *source, - size_type *result) + const matrix::Csr* source, + size_type* result) { const auto num_rows = source->get_size()[0]; @@ -1254,8 +1248,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_hybrid(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Hybrid *result) + const matrix::Csr* source, + matrix::Hybrid* result) { auto ell_val = result->get_ell_values(); auto ell_col = result->get_ell_col_idxs(); @@ -1296,8 +1290,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_nonzeros_per_row(std::shared_ptr exec, - const matrix::Csr *source, - Array *result) + const matrix::Csr* source, + Array* result) { const auto num_rows = source->get_size()[0]; auto row_ptrs = source->get_const_row_ptrs(); @@ -1313,7 +1307,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void sort_by_column_index(std::shared_ptr exec, - matrix::Csr *to_sort) + matrix::Csr* to_sort) { if (cusparse::is_supported::value) { auto handle = exec->get_cusparse_handle(); @@ -1352,7 +1346,7 @@ void sort_by_column_index(std::shared_ptr exec, #else // CUDA_VERSION >= 11000 auto val_vec = cusparse::create_spvec(nnz, nnz, permutation, vals); auto tmp_vec = - cusparse::create_dnvec(nnz, const_cast(tmp_vals)); + cusparse::create_dnvec(nnz, const_cast(tmp_vals)); cusparse::gather(handle, tmp_vec, val_vec); #endif @@ -1369,7 +1363,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::Csr *to_check, bool *is_sorted) + const matrix::Csr* to_check, bool* is_sorted) { *is_sorted = true; auto cpu_array = Array::view(exec->get_master(), 1, is_sorted); @@ -1389,8 +1383,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Csr *orig, - matrix::Diagonal *diag) + const matrix::Csr* orig, + matrix::Diagonal* diag) { const auto nnz = orig->get_num_stored_elements(); const auto diag_size = diag->get_size()[0]; diff --git a/cuda/matrix/dense_kernels.cu b/cuda/matrix/dense_kernels.cu index 477bdb89e54..7a18bb06e39 100644 --- a/cuda/matrix/dense_kernels.cu +++ b/cuda/matrix/dense_kernels.cu @@ -72,9 +72,9 @@ constexpr int default_block_size = 512; template void simple_apply(std::shared_ptr exec, - const matrix::Dense *a, - const matrix::Dense *b, - matrix::Dense *c) + const matrix::Dense* a, + const matrix::Dense* b, + matrix::Dense* c) { if (cublas::is_supported::value) { auto handle = exec->get_cublas_handle(); @@ -98,9 +98,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL); template void apply(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Dense *a, const matrix::Dense *b, - const matrix::Dense *beta, matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Dense* a, const matrix::Dense* b, + const matrix::Dense* beta, matrix::Dense* c) { if (cublas::is_supported::value) { cublas::gemm(exec->get_cublas_handle(), CUBLAS_OP_N, CUBLAS_OP_N, @@ -119,9 +119,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); template void compute_dot(std::shared_ptr exec, - const matrix::Dense *x, - const matrix::Dense *y, - matrix::Dense *result) + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) { if (cublas::is_supported::value) { // TODO: write a custom kernel which does this more efficiently @@ -162,9 +162,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL); template void compute_conj_dot(std::shared_ptr exec, - const matrix::Dense *x, - const matrix::Dense *y, - matrix::Dense *result) + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) { if (cublas::is_supported::value) { // TODO: write a custom kernel which does this more efficiently @@ -206,8 +206,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL); template void compute_norm2(std::shared_ptr exec, - const matrix::Dense *x, - matrix::Dense> *result) + const matrix::Dense* x, + matrix::Dense>* result) { if (cublas::is_supported::value) { for (size_type col = 0; col < x->get_size()[1]; ++col) { @@ -246,8 +246,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL); template void convert_to_coo(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Coo *result) + const matrix::Dense* source, + matrix::Coo* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -278,8 +278,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Csr *result) + const matrix::Dense* source, + matrix::Csr* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -312,8 +312,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_ell(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Ell *result) + const matrix::Dense* source, + matrix::Ell* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -338,8 +338,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_hybrid(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Hybrid *result) + const matrix::Dense* source, + matrix::Hybrid* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -348,8 +348,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sellp(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Sellp *result) + const matrix::Dense* source, + matrix::Sellp* result) { const auto stride = source->get_stride(); const auto num_rows = result->get_size()[0]; @@ -398,8 +398,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sparsity_csr(std::shared_ptr exec, - const matrix::Dense *source, - matrix::SparsityCsr *result) + const matrix::Dense* source, + matrix::SparsityCsr* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -408,7 +408,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Dense *source, size_type *result) + const matrix::Dense* source, size_type* result) { const auto num_rows = source->get_size()[0]; auto nnz_per_row = Array(exec, num_rows); @@ -423,8 +423,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COUNT_NONZEROS_KERNEL); template void calculate_max_nnz_per_row(std::shared_ptr exec, - const matrix::Dense *source, - size_type *result) + const matrix::Dense* source, + size_type* result) { const auto num_rows = source->get_size()[0]; auto nnz_per_row = Array(exec, num_rows); @@ -458,8 +458,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void calculate_nonzeros_per_row(std::shared_ptr exec, - const matrix::Dense *source, - Array *result) + const matrix::Dense* source, + Array* result) { const dim3 block_size(default_block_size, 1, 1); auto rows_per_block = ceildiv(default_block_size, config::warp_size); @@ -479,8 +479,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void calculate_total_cols(std::shared_ptr exec, - const matrix::Dense *source, - size_type *result, size_type stride_factor, + const matrix::Dense* source, + size_type* result, size_type stride_factor, size_type slice_size) { const auto num_rows = source->get_size()[0]; @@ -530,8 +530,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void transpose(std::shared_ptr exec, - const matrix::Dense *orig, - matrix::Dense *trans) + const matrix::Dense* orig, + matrix::Dense* trans) { if (cublas::is_supported::value) { auto handle = exec->get_cublas_handle(); @@ -539,11 +539,11 @@ void transpose(std::shared_ptr exec, cublas::pointer_mode_guard pm_guard(handle); auto alpha = one(); auto beta = zero(); - cublas::geam( - handle, CUBLAS_OP_T, CUBLAS_OP_N, orig->get_size()[0], - orig->get_size()[1], &alpha, orig->get_const_values(), - orig->get_stride(), &beta, static_cast(nullptr), - trans->get_size()[1], trans->get_values(), trans->get_stride()); + cublas::geam(handle, CUBLAS_OP_T, CUBLAS_OP_N, orig->get_size()[0], + orig->get_size()[1], &alpha, orig->get_const_values(), + orig->get_stride(), &beta, + static_cast(nullptr), trans->get_size()[1], + trans->get_values(), trans->get_stride()); } } else { GKO_NOT_IMPLEMENTED; @@ -555,8 +555,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_TRANSPOSE_KERNEL); template void conj_transpose(std::shared_ptr exec, - const matrix::Dense *orig, - matrix::Dense *trans) + const matrix::Dense* orig, + matrix::Dense* trans) { if (cublas::is_supported::value) { auto handle = exec->get_cublas_handle(); @@ -564,11 +564,11 @@ void conj_transpose(std::shared_ptr exec, cublas::pointer_mode_guard pm_guard(handle); auto alpha = one(); auto beta = zero(); - cublas::geam( - handle, CUBLAS_OP_C, CUBLAS_OP_N, orig->get_size()[0], - orig->get_size()[1], &alpha, orig->get_const_values(), - orig->get_stride(), &beta, static_cast(nullptr), - trans->get_size()[1], trans->get_values(), trans->get_stride()); + cublas::geam(handle, CUBLAS_OP_C, CUBLAS_OP_N, orig->get_size()[0], + orig->get_size()[1], &alpha, orig->get_const_values(), + orig->get_stride(), &beta, + static_cast(nullptr), trans->get_size()[1], + trans->get_values(), trans->get_stride()); } } else { GKO_NOT_IMPLEMENTED; diff --git a/cuda/matrix/diagonal_kernels.cu b/cuda/matrix/diagonal_kernels.cu index bea495fdc2f..df1d9eb2c7c 100644 --- a/cuda/matrix/diagonal_kernels.cu +++ b/cuda/matrix/diagonal_kernels.cu @@ -62,9 +62,9 @@ constexpr int default_block_size = 512; template void apply_to_csr(std::shared_ptr exec, - const matrix::Diagonal *a, - const matrix::Csr *b, - matrix::Csr *c) + const matrix::Diagonal* a, + const matrix::Csr* b, + matrix::Csr* c) { const auto num_rows = b->get_size()[0]; const auto diag_values = a->get_const_values(); diff --git a/cuda/matrix/ell_kernels.cu b/cuda/matrix/ell_kernels.cu index fe0cb2a2c44..339689fa92c 100644 --- a/cuda/matrix/ell_kernels.cu +++ b/cuda/matrix/ell_kernels.cu @@ -111,7 +111,7 @@ namespace { template GKO_INLINE auto as_cuda_accessor( - const acc::range> &acc) + const acc::range>& acc) { return acc::range< acc::reduced_row_major, cuda_type>>( @@ -124,11 +124,11 @@ GKO_INLINE auto as_cuda_accessor( template void abstract_spmv(syn::value_list, int num_worker_per_row, - const matrix::Ell *a, - const matrix::Dense *b, - matrix::Dense *c, - const matrix::Dense *alpha = nullptr, - const matrix::Dense *beta = nullptr) + const matrix::Ell* a, + const matrix::Dense* b, + matrix::Dense* c, + const matrix::Dense* alpha = nullptr, + const matrix::Dense* beta = nullptr) { using a_accessor = gko::acc::reduced_row_major<1, OutputValueType, const MatrixValueType>; @@ -183,7 +183,7 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_abstract_spmv, abstract_spmv); template std::array compute_thread_worker_and_atomicity( std::shared_ptr exec, - const matrix::Ell *a) + const matrix::Ell* a) { int num_thread_per_worker = 1; int atomic = 0; @@ -227,9 +227,9 @@ std::array compute_thread_worker_and_atomicity( template void spmv(std::shared_ptr exec, - const matrix::Ell *a, - const matrix::Dense *b, - matrix::Dense *c) + const matrix::Ell* a, + const matrix::Dense* b, + matrix::Dense* c) { const auto data = compute_thread_worker_and_atomicity(exec, a); const int num_thread_per_worker = std::get<0>(data); @@ -261,11 +261,11 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Ell *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Ell* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { const auto data = compute_thread_worker_and_atomicity(exec, a); const int num_thread_per_worker = std::get<0>(data); @@ -294,8 +294,8 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Ell *source, - matrix::Dense *result) + const matrix::Ell* source, + matrix::Dense* result) { const auto num_rows = result->get_size()[0]; const auto num_cols = result->get_size()[1]; @@ -324,8 +324,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Ell *source, - matrix::Csr *result) + const matrix::Ell* source, + matrix::Csr* result) { auto num_rows = result->get_size()[0]; @@ -361,8 +361,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Ell *source, - size_type *result) + const matrix::Ell* source, + size_type* result) { const auto num_rows = source->get_size()[0]; auto nnz_per_row = Array(exec, num_rows); @@ -378,8 +378,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_nonzeros_per_row(std::shared_ptr exec, - const matrix::Ell *source, - Array *result) + const matrix::Ell* source, + Array* result) { const auto num_rows = source->get_size()[0]; const auto max_nnz_per_row = source->get_num_stored_elements_per_row(); @@ -400,8 +400,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Ell *orig, - matrix::Diagonal *diag) + const matrix::Ell* orig, + matrix::Diagonal* diag) { const auto max_nnz_per_row = orig->get_num_stored_elements_per_row(); const auto orig_stride = orig->get_stride(); diff --git a/cuda/matrix/fbcsr_kernels.cu b/cuda/matrix/fbcsr_kernels.cu index 6f7bc48cf92..2fb777b2cac 100644 --- a/cuda/matrix/fbcsr_kernels.cu +++ b/cuda/matrix/fbcsr_kernels.cu @@ -59,20 +59,20 @@ namespace fbcsr { template void spmv(std::shared_ptr exec, - const matrix::Fbcsr *a, - const matrix::Dense *b, - matrix::Dense *c) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* a, + const matrix::Dense* b, + matrix::Dense* c) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Fbcsr *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) GKO_NOT_IMPLEMENTED; + const matrix::Dense* alpha, + const matrix::Fbcsr* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); @@ -80,14 +80,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_row_ptrs_to_idxs(std::shared_ptr exec, - const IndexType *ptrs, size_type num_rows, - IndexType *idxs) GKO_NOT_IMPLEMENTED; + const IndexType* ptrs, size_type num_rows, + IndexType* idxs) GKO_NOT_IMPLEMENTED; template void convert_to_dense(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Dense *result) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* source, + matrix::Dense* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); @@ -95,8 +95,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(const std::shared_ptr exec, - const matrix::Fbcsr *const source, - matrix::Csr *const result) + const matrix::Fbcsr* const source, + matrix::Csr* const result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -105,8 +105,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void transpose(std::shared_ptr exec, - const matrix::Fbcsr *orig, - matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* orig, + matrix::Fbcsr* trans) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); @@ -114,8 +114,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void conj_transpose(std::shared_ptr exec, - const matrix::Fbcsr *orig, - matrix::Fbcsr *trans) + const matrix::Fbcsr* orig, + matrix::Fbcsr* trans) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -125,8 +125,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_max_nnz_per_row( std::shared_ptr exec, - const matrix::Fbcsr *source, - size_type *result) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* source, + size_type* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); @@ -135,8 +135,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_nonzeros_per_row( std::shared_ptr exec, - const matrix::Fbcsr *source, - Array *result) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* source, + Array* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); @@ -145,8 +145,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::Fbcsr *to_check, - bool *is_sorted) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* to_check, + bool* is_sorted) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); @@ -154,7 +154,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void sort_by_column_index(const std::shared_ptr exec, - matrix::Fbcsr *const to_sort) + matrix::Fbcsr* const to_sort) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -163,8 +163,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Fbcsr *orig, - matrix::Diagonal *diag) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* orig, + matrix::Diagonal* diag) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); diff --git a/cuda/matrix/hybrid_kernels.cu b/cuda/matrix/hybrid_kernels.cu index b699aa52e32..a8aa6933b20 100644 --- a/cuda/matrix/hybrid_kernels.cu +++ b/cuda/matrix/hybrid_kernels.cu @@ -71,8 +71,8 @@ constexpr int warps_in_block = 4; template void convert_to_dense(std::shared_ptr exec, - const matrix::Hybrid *source, - matrix::Dense *result) GKO_NOT_IMPLEMENTED; + const matrix::Hybrid* source, + matrix::Dense* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_HYBRID_CONVERT_TO_DENSE_KERNEL); @@ -80,8 +80,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Hybrid *source, - matrix::Csr *result) + const matrix::Hybrid* source, + matrix::Csr* result) { const auto num_rows = source->get_size()[0]; auto coo_offset = Array(exec, num_rows + 1); @@ -148,8 +148,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Hybrid *source, - size_type *result) + const matrix::Hybrid* source, + size_type* result) { size_type ell_nnz = 0; size_type coo_nnz = 0; diff --git a/cuda/matrix/sellp_kernels.cu b/cuda/matrix/sellp_kernels.cu index a53ae60be82..6e6d5e74ff6 100644 --- a/cuda/matrix/sellp_kernels.cu +++ b/cuda/matrix/sellp_kernels.cu @@ -67,8 +67,8 @@ constexpr int default_block_size = 512; template void spmv(std::shared_ptr exec, - const matrix::Sellp *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Sellp* a, + const matrix::Dense* b, matrix::Dense* c) { const dim3 blockSize(matrix::default_slice_size); const dim3 gridSize(ceildiv(a->get_size()[0], matrix::default_slice_size), @@ -86,11 +86,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SELLP_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Sellp *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Sellp* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { const dim3 blockSize(matrix::default_slice_size); const dim3 gridSize(ceildiv(a->get_size()[0], matrix::default_slice_size), @@ -111,8 +111,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Sellp *source, - matrix::Dense *result) + const matrix::Sellp* source, + matrix::Dense* result) { const auto num_rows = source->get_size()[0]; const auto num_cols = source->get_size()[1]; @@ -155,8 +155,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Sellp *source, - matrix::Csr *result) + const matrix::Sellp* source, + matrix::Csr* result) { const auto num_rows = source->get_size()[0]; const auto slice_size = source->get_slice_size(); @@ -201,8 +201,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Sellp *source, - size_type *result) + const matrix::Sellp* source, + size_type* result) { const auto num_rows = source->get_size()[0]; @@ -232,8 +232,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Sellp *orig, - matrix::Diagonal *diag) + const matrix::Sellp* orig, + matrix::Diagonal* diag) { const auto diag_size = diag->get_size()[0]; const auto slice_size = orig->get_slice_size(); diff --git a/cuda/matrix/sparsity_csr_kernels.cu b/cuda/matrix/sparsity_csr_kernels.cu index b312e2a7872..90d2fba8dcb 100644 --- a/cuda/matrix/sparsity_csr_kernels.cu +++ b/cuda/matrix/sparsity_csr_kernels.cu @@ -49,9 +49,9 @@ namespace sparsity_csr { template void spmv(std::shared_ptr exec, - const matrix::SparsityCsr *a, - const matrix::Dense *b, - matrix::Dense *c) GKO_NOT_IMPLEMENTED; + const matrix::SparsityCsr* a, + const matrix::Dense* b, + matrix::Dense* c) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_SPARSITY_CSR_SPMV_KERNEL); @@ -59,11 +59,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::SparsityCsr *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) GKO_NOT_IMPLEMENTED; + const matrix::Dense* alpha, + const matrix::SparsityCsr* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_SPARSITY_CSR_ADVANCED_SPMV_KERNEL); @@ -72,8 +72,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_num_diagonal_elements( std::shared_ptr exec, - const matrix::SparsityCsr *matrix, - size_type *num_diagonal_elements) GKO_NOT_IMPLEMENTED; + const matrix::SparsityCsr* matrix, + size_type* num_diagonal_elements) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_SPARSITY_CSR_COUNT_NUM_DIAGONAL_ELEMENTS_KERNEL); @@ -81,9 +81,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void remove_diagonal_elements( - std::shared_ptr exec, const IndexType *row_ptrs, - const IndexType *col_idxs, - matrix::SparsityCsr *matrix) GKO_NOT_IMPLEMENTED; + std::shared_ptr exec, const IndexType* row_ptrs, + const IndexType* col_idxs, + matrix::SparsityCsr* matrix) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_SPARSITY_CSR_REMOVE_DIAGONAL_ELEMENTS_KERNEL); @@ -91,8 +91,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void transpose(std::shared_ptr exec, - const matrix::SparsityCsr *orig, - matrix::SparsityCsr *trans) + const matrix::SparsityCsr* orig, + matrix::SparsityCsr* trans) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -101,7 +101,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void sort_by_column_index(std::shared_ptr exec, - matrix::SparsityCsr *to_sort) + matrix::SparsityCsr* to_sort) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -111,8 +111,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::SparsityCsr *to_check, - bool *is_sorted) GKO_NOT_IMPLEMENTED; + const matrix::SparsityCsr* to_check, + bool* is_sorted) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_SPARSITY_CSR_IS_SORTED_BY_COLUMN_INDEX); diff --git a/cuda/multigrid/amgx_pgm_kernels.cu b/cuda/multigrid/amgx_pgm_kernels.cu index 8c811a638a5..da7fcd1521e 100644 --- a/cuda/multigrid/amgx_pgm_kernels.cu +++ b/cuda/multigrid/amgx_pgm_kernels.cu @@ -77,8 +77,8 @@ constexpr int default_block_size = 512; template void match_edge(std::shared_ptr exec, - const Array &strongest_neighbor, - Array &agg) + const Array& strongest_neighbor, + Array& agg) { const auto num = agg.get_num_elems(); const dim3 grid(ceildiv(num, default_block_size)); @@ -91,7 +91,7 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_MATCH_EDGE_KERNEL); template void count_unagg(std::shared_ptr exec, - const Array &agg, IndexType *num_unagg) + const Array& agg, IndexType* num_unagg) { Array active_agg(exec, agg.get_num_elems()); const dim3 grid(ceildiv(active_agg.get_num_elems(), default_block_size)); @@ -106,8 +106,8 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_COUNT_UNAGG_KERNEL); template -void renumber(std::shared_ptr exec, Array &agg, - IndexType *num_agg) +void renumber(std::shared_ptr exec, Array& agg, + IndexType* num_agg) { const auto num = agg.get_num_elems(); Array agg_map(exec, num + 1); @@ -126,9 +126,9 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_RENUMBER_KERNEL); template void find_strongest_neighbor( std::shared_ptr exec, - const matrix::Csr *weight_mtx, - const matrix::Diagonal *diag, Array &agg, - Array &strongest_neighbor) + const matrix::Csr* weight_mtx, + const matrix::Diagonal* diag, Array& agg, + Array& strongest_neighbor) { const auto num = agg.get_num_elems(); const dim3 grid(ceildiv(num, default_block_size)); @@ -143,10 +143,10 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( template void assign_to_exist_agg(std::shared_ptr exec, - const matrix::Csr *weight_mtx, - const matrix::Diagonal *diag, - Array &agg, - Array &intermediate_agg) + const matrix::Csr* weight_mtx, + const matrix::Diagonal* diag, + Array& agg, + Array& intermediate_agg) { const auto num = agg.get_num_elems(); const dim3 grid(ceildiv(num, default_block_size)); diff --git a/cuda/preconditioner/isai_kernels.cu b/cuda/preconditioner/isai_kernels.cu index 87732ea6dad..363a2a1671f 100644 --- a/cuda/preconditioner/isai_kernels.cu +++ b/cuda/preconditioner/isai_kernels.cu @@ -73,9 +73,9 @@ constexpr int default_block_size{subwarps_per_block * subwarp_size}; template void generate_tri_inverse(std::shared_ptr exec, - const matrix::Csr *input, - matrix::Csr *inverse, - IndexType *excess_rhs_ptrs, IndexType *excess_nz_ptrs, + const matrix::Csr* input, + matrix::Csr* inverse, + IndexType* excess_rhs_ptrs, IndexType* excess_nz_ptrs, bool lower) { const auto num_rows = input->get_size()[0]; @@ -111,10 +111,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void generate_general_inverse(std::shared_ptr exec, - const matrix::Csr *input, - matrix::Csr *inverse, - IndexType *excess_rhs_ptrs, - IndexType *excess_nz_ptrs, bool spd) + const matrix::Csr* input, + matrix::Csr* inverse, + IndexType* excess_rhs_ptrs, + IndexType* excess_nz_ptrs, bool spd) { const auto num_rows = input->get_size()[0]; @@ -137,12 +137,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void generate_excess_system(std::shared_ptr exec, - const matrix::Csr *input, - const matrix::Csr *inverse, - const IndexType *excess_rhs_ptrs, - const IndexType *excess_nz_ptrs, - matrix::Csr *excess_system, - matrix::Dense *excess_rhs, + const matrix::Csr* input, + const matrix::Csr* inverse, + const IndexType* excess_rhs_ptrs, + const IndexType* excess_nz_ptrs, + matrix::Csr* excess_system, + matrix::Dense* excess_rhs, size_type e_start, size_type e_end) { const auto num_rows = input->get_size()[0]; @@ -165,8 +165,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void scale_excess_solution(std::shared_ptr, - const IndexType *excess_block_ptrs, - matrix::Dense *excess_solution, + const IndexType* excess_block_ptrs, + matrix::Dense* excess_solution, size_type e_start, size_type e_end) { const dim3 block(default_block_size, 1, 1); @@ -182,9 +182,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void scatter_excess_solution(std::shared_ptr exec, - const IndexType *excess_rhs_ptrs, - const matrix::Dense *excess_solution, - matrix::Csr *inverse, + const IndexType* excess_rhs_ptrs, + const matrix::Dense* excess_solution, + matrix::Csr* inverse, size_type e_start, size_type e_end) { const auto num_rows = inverse->get_size()[0]; diff --git a/cuda/preconditioner/jacobi_advanced_apply_kernel.cu b/cuda/preconditioner/jacobi_advanced_apply_kernel.cu index 7ed975868ef..d4c1649d6fd 100644 --- a/cuda/preconditioner/jacobi_advanced_apply_kernel.cu +++ b/cuda/preconditioner/jacobi_advanced_apply_kernel.cu @@ -70,12 +70,12 @@ template void advanced_apply( syn::value_list, size_type num_blocks, - const precision_reduction *block_precisions, - const IndexType *block_pointers, const ValueType *blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - const ValueType *alpha, const ValueType *b, size_type b_stride, - ValueType *x, size_type x_stride) + const precision_reduction* block_precisions, + const IndexType* block_pointers, const ValueType* blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + const ValueType* alpha, const ValueType* b, size_type b_stride, + ValueType* x, size_type x_stride) { constexpr int subwarp_size = get_larger_power(max_block_size); constexpr int blocks_per_warp = config::warp_size / subwarp_size; @@ -108,14 +108,14 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_advanced_apply, advanced_apply); template void apply(std::shared_ptr exec, size_type num_blocks, uint32 max_block_size, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - const Array &block_precisions, - const Array &block_pointers, - const Array &blocks, - const matrix::Dense *alpha, - const matrix::Dense *b, - const matrix::Dense *beta, matrix::Dense *x) + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + const Array& block_precisions, + const Array& block_pointers, + const Array& blocks, + const matrix::Dense* alpha, + const matrix::Dense* b, + const matrix::Dense* beta, matrix::Dense* x) { // TODO: write a special kernel for multiple RHS dense::scale(exec, beta, x); diff --git a/cuda/preconditioner/jacobi_generate_kernel.cu b/cuda/preconditioner/jacobi_generate_kernel.cu index b897bdc90d9..218026df629 100644 --- a/cuda/preconditioner/jacobi_generate_kernel.cu +++ b/cuda/preconditioner/jacobi_generate_kernel.cu @@ -72,13 +72,13 @@ namespace { template void generate(syn::value_list, - const matrix::Csr *mtx, - remove_complex accuracy, ValueType *block_data, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - remove_complex *conditioning, - precision_reduction *block_precisions, - const IndexType *block_ptrs, size_type num_blocks) + const matrix::Csr* mtx, + remove_complex accuracy, ValueType* block_data, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + remove_complex* conditioning, + precision_reduction* block_precisions, + const IndexType* block_ptrs, size_type num_blocks) { constexpr int subwarp_size = get_larger_power(max_block_size); constexpr int blocks_per_warp = config::warp_size / subwarp_size; @@ -113,14 +113,14 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_generate, generate); template void generate(std::shared_ptr exec, - const matrix::Csr *system_matrix, + const matrix::Csr* system_matrix, size_type num_blocks, uint32 max_block_size, remove_complex accuracy, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - Array> &conditioning, - Array &block_precisions, - const Array &block_pointers, Array &blocks) + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + Array>& conditioning, + Array& block_precisions, + const Array& block_pointers, Array& blocks) { components::fill_array(exec, blocks.get_data(), blocks.get_num_elems(), zero()); diff --git a/cuda/preconditioner/jacobi_kernels.cu b/cuda/preconditioner/jacobi_kernels.cu index 77303eb16d7..aa539856de1 100644 --- a/cuda/preconditioner/jacobi_kernels.cu +++ b/cuda/preconditioner/jacobi_kernels.cu @@ -71,9 +71,9 @@ constexpr int default_grid_size = 32 * 32 * 128; template size_type find_natural_blocks(std::shared_ptr exec, - const matrix::Csr *mtx, + const matrix::Csr* mtx, int32 max_block_size, - IndexType *__restrict__ block_ptrs) + IndexType* __restrict__ block_ptrs) { Array nums(exec, 1); @@ -96,7 +96,7 @@ size_type find_natural_blocks(std::shared_ptr exec, template inline size_type agglomerate_supervariables( std::shared_ptr exec, int32 max_block_size, - size_type num_natural_blocks, IndexType *block_ptrs) + size_type num_natural_blocks, IndexType* block_ptrs) { Array nums(exec, 1); @@ -112,8 +112,8 @@ inline size_type agglomerate_supervariables( void initialize_precisions(std::shared_ptr exec, - const Array &source, - Array &precisions) + const Array& source, + Array& precisions) { const auto block_size = default_num_warps * config::warp_size; const auto grid_size = min( @@ -127,9 +127,9 @@ void initialize_precisions(std::shared_ptr exec, template void find_blocks(std::shared_ptr exec, - const matrix::Csr *system_matrix, - uint32 max_block_size, size_type &num_blocks, - Array &block_pointers) + const matrix::Csr* system_matrix, + uint32 max_block_size, size_type& num_blocks, + Array& block_pointers) { auto num_natural_blocks = find_natural_blocks( exec, system_matrix, max_block_size, block_pointers.get_data()); @@ -148,11 +148,11 @@ template void transpose_jacobi( syn::value_list, size_type num_blocks, - const precision_reduction *block_precisions, - const IndexType *block_pointers, const ValueType *blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - ValueType *out_blocks) + const precision_reduction* block_precisions, + const IndexType* block_pointers, const ValueType* blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + ValueType* out_blocks) { constexpr int subwarp_size = get_larger_power(max_block_size); constexpr int blocks_per_warp = config::warp_size / subwarp_size; @@ -183,11 +183,11 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_transpose_jacobi, transpose_jacobi); template void transpose_jacobi( std::shared_ptr exec, size_type num_blocks, - uint32 max_block_size, const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - Array &out_blocks) + uint32 max_block_size, const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + Array& out_blocks) { select_transpose_jacobi( compiled_kernels(), @@ -207,11 +207,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void conj_transpose_jacobi( std::shared_ptr exec, size_type num_blocks, - uint32 max_block_size, const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - Array &out_blocks) + uint32 max_block_size, const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + Array& out_blocks) { select_transpose_jacobi( compiled_kernels(), @@ -231,11 +231,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense( std::shared_ptr exec, size_type num_blocks, - const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - ValueType *result_values, size_type result_stride) GKO_NOT_IMPLEMENTED; + const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + ValueType* result_values, size_type result_stride) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL); diff --git a/cuda/preconditioner/jacobi_simple_apply_kernel.cu b/cuda/preconditioner/jacobi_simple_apply_kernel.cu index 216af08cca7..70e33b8caaf 100644 --- a/cuda/preconditioner/jacobi_simple_apply_kernel.cu +++ b/cuda/preconditioner/jacobi_simple_apply_kernel.cu @@ -69,11 +69,11 @@ namespace { template void apply(syn::value_list, size_type num_blocks, - const precision_reduction *block_precisions, - const IndexType *block_pointers, const ValueType *blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - const ValueType *b, size_type b_stride, ValueType *x, + const precision_reduction* block_precisions, + const IndexType* block_pointers, const ValueType* blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + const ValueType* b, size_type b_stride, ValueType* x, size_type x_stride) { constexpr int subwarp_size = get_larger_power(max_block_size); @@ -107,11 +107,11 @@ template void simple_apply( std::shared_ptr exec, size_type num_blocks, uint32 max_block_size, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const matrix::Dense *b, matrix::Dense *x) + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const matrix::Dense* b, matrix::Dense* x) { // TODO: write a special kernel for multiple RHS for (size_type col = 0; col < b->get_size()[1]; ++col) { diff --git a/cuda/reorder/rcm_kernels.cu b/cuda/reorder/rcm_kernels.cu index a970b6e86b2..165209f284a 100644 --- a/cuda/reorder/rcm_kernels.cu +++ b/cuda/reorder/rcm_kernels.cu @@ -60,8 +60,8 @@ namespace rcm { template void get_degree_of_nodes(std::shared_ptr exec, const IndexType num_vertices, - const IndexType *const row_ptrs, - IndexType *const degrees) GKO_NOT_IMPLEMENTED; + const IndexType* const row_ptrs, + IndexType* const degrees) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL); @@ -69,9 +69,9 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL); template void get_permutation( std::shared_ptr exec, const IndexType num_vertices, - const IndexType *const row_ptrs, const IndexType *const col_idxs, - const IndexType *const degrees, IndexType *const permutation, - IndexType *const inv_permutation, + const IndexType* const row_ptrs, const IndexType* const col_idxs, + const IndexType* const degrees, IndexType* const permutation, + IndexType* const inv_permutation, const gko::reorder::starting_strategy strategy) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); diff --git a/cuda/solver/cb_gmres_kernels.cu b/cuda/solver/cb_gmres_kernels.cu index f0c4cb61aa6..4ab81512dd8 100644 --- a/cuda/solver/cb_gmres_kernels.cu +++ b/cuda/solver/cb_gmres_kernels.cu @@ -80,7 +80,7 @@ constexpr int default_dot_size = default_dot_dim * default_dot_dim; // Specialization, so the Accessor can use the same function as regular pointers template GKO_INLINE auto as_cuda_accessor( - const acc::range> &acc) + const acc::range>& acc) { return acc::range< acc::reduced_row_major, cuda_type>>( @@ -91,8 +91,8 @@ GKO_INLINE auto as_cuda_accessor( template GKO_INLINE auto as_cuda_accessor( - const acc::range> - &acc) + const acc::range>& + acc) { return acc::range, cuda_type, mask>>( @@ -105,7 +105,7 @@ GKO_INLINE auto as_cuda_accessor( template -void zero_matrix(size_type m, size_type n, size_type stride, ValueType *array) +void zero_matrix(size_type m, size_type n, size_type stride, ValueType* array) { const dim3 block_size(default_block_size, 1, 1); const dim3 grid_size(ceildiv(n, block_size.x), 1, 1); @@ -116,11 +116,11 @@ void zero_matrix(size_type m, size_type n, size_type stride, ValueType *array) template void initialize_1(std::shared_ptr exec, - const matrix::Dense *b, - matrix::Dense *residual, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - Array *stop_status, size_type krylov_dim) + const matrix::Dense* b, + matrix::Dense* residual, + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + Array* stop_status, size_type krylov_dim) { const auto num_threads = std::max(b->get_size()[0] * b->get_stride(), krylov_dim * b->get_size()[1]); @@ -142,13 +142,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CB_GMRES_INITIALIZE_1_KERNEL); template void initialize_2(std::shared_ptr exec, - const matrix::Dense *residual, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - matrix::Dense> *arnoldi_norm, + const matrix::Dense* residual, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + matrix::Dense>* arnoldi_norm, Accessor3d krylov_bases, - matrix::Dense *next_krylov_basis, - Array *final_iter_nums, size_type krylov_dim) + matrix::Dense* next_krylov_basis, + Array* final_iter_nums, size_type krylov_dim) { constexpr bool use_scalar = gko::cb_gmres::detail::has_3d_scaled_accessor::value; @@ -214,14 +214,14 @@ GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE( template void finish_arnoldi_CGS(std::shared_ptr exec, - matrix::Dense *next_krylov_basis, + matrix::Dense* next_krylov_basis, Accessor3dim krylov_bases, - matrix::Dense *hessenberg_iter, - matrix::Dense *buffer_iter, - matrix::Dense> *arnoldi_norm, - size_type iter, const stopping_status *stop_status, - stopping_status *reorth_status, - Array *num_reorth) + matrix::Dense* hessenberg_iter, + matrix::Dense* buffer_iter, + matrix::Dense>* arnoldi_norm, + size_type iter, const stopping_status* stop_status, + stopping_status* reorth_status, + Array* num_reorth) { using non_complex = remove_complex; // optimization parameter @@ -388,12 +388,12 @@ void finish_arnoldi_CGS(std::shared_ptr exec, template void givens_rotation(std::shared_ptr exec, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense *hessenberg_iter, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - size_type iter, const Array *stop_status) + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense* hessenberg_iter, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + size_type iter, const Array* stop_status) { // TODO: tune block_size for optimal performance constexpr auto block_size = default_block_size; @@ -416,17 +416,17 @@ void givens_rotation(std::shared_ptr exec, template void step_1(std::shared_ptr exec, - matrix::Dense *next_krylov_basis, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - Accessor3d krylov_bases, matrix::Dense *hessenberg_iter, - matrix::Dense *buffer_iter, - matrix::Dense> *arnoldi_norm, - size_type iter, Array *final_iter_nums, - const Array *stop_status, - Array *reorth_status, Array *num_reorth) + matrix::Dense* next_krylov_basis, + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + Accessor3d krylov_bases, matrix::Dense* hessenberg_iter, + matrix::Dense* buffer_iter, + matrix::Dense>* arnoldi_norm, + size_type iter, Array* final_iter_nums, + const Array* stop_status, + Array* reorth_status, Array* num_reorth) { increase_final_iteration_numbers_kernel<<< static_cast( @@ -447,9 +447,9 @@ GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_STEP_1_KERNEL); template void solve_upper_triangular( - const matrix::Dense *residual_norm_collection, - const matrix::Dense *hessenberg, matrix::Dense *y, - const Array *final_iter_nums) + const matrix::Dense* residual_norm_collection, + const matrix::Dense* hessenberg, matrix::Dense* y, + const Array* final_iter_nums) { // TODO: tune block_size for optimal performance constexpr auto block_size = default_block_size; @@ -470,9 +470,9 @@ void solve_upper_triangular( template void calculate_qy(ConstAccessor3d krylov_bases, size_type num_krylov_bases, - const matrix::Dense *y, - matrix::Dense *before_preconditioner, - const Array *final_iter_nums) + const matrix::Dense* y, + matrix::Dense* before_preconditioner, + const Array* final_iter_nums) { const auto num_rows = before_preconditioner->get_size()[0]; const auto num_cols = before_preconditioner->get_size()[1]; @@ -500,12 +500,12 @@ void calculate_qy(ConstAccessor3d krylov_bases, size_type num_krylov_bases, template void step_2(std::shared_ptr exec, - const matrix::Dense *residual_norm_collection, + const matrix::Dense* residual_norm_collection, ConstAccessor3d krylov_bases, - const matrix::Dense *hessenberg, - matrix::Dense *y, - matrix::Dense *before_preconditioner, - const Array *final_iter_nums) + const matrix::Dense* hessenberg, + matrix::Dense* y, + matrix::Dense* before_preconditioner, + const Array* final_iter_nums) { // since hessenberg has dims: iters x iters * num_rhs // krylov_bases has dims: (iters + 1) x sysmtx[0] x num_rhs diff --git a/cuda/solver/common_trs_kernels.cuh b/cuda/solver/common_trs_kernels.cuh index 53d6661c45f..9173b2155f6 100644 --- a/cuda/solver/common_trs_kernels.cuh +++ b/cuda/solver/common_trs_kernels.cuh @@ -75,7 +75,7 @@ struct SolveStruct : gko::solver::SolveStruct { cusparseSolvePolicy_t policy; cusparseMatDescr_t factor_descr; size_t factor_work_size; - void *factor_work_vec; + void* factor_work_vec; SolveStruct() { factor_work_vec = nullptr; @@ -91,13 +91,13 @@ struct SolveStruct : gko::solver::SolveStruct { policy = CUSPARSE_SOLVE_POLICY_USE_LEVEL; } - SolveStruct(const SolveStruct &) = delete; + SolveStruct(const SolveStruct&) = delete; - SolveStruct(SolveStruct &&) = delete; + SolveStruct(SolveStruct&&) = delete; - SolveStruct &operator=(const SolveStruct &) = delete; + SolveStruct& operator=(const SolveStruct&) = delete; - SolveStruct &operator=(SolveStruct &&) = delete; + SolveStruct& operator=(SolveStruct&&) = delete; ~SolveStruct() { @@ -132,13 +132,13 @@ struct SolveStruct : gko::solver::SolveStruct { cusparseSetMatDiagType(factor_descr, CUSPARSE_DIAG_TYPE_NON_UNIT)); } - SolveStruct(const SolveStruct &) = delete; + SolveStruct(const SolveStruct&) = delete; - SolveStruct(SolveStruct &&) = delete; + SolveStruct(SolveStruct&&) = delete; - SolveStruct &operator=(const SolveStruct &) = delete; + SolveStruct& operator=(const SolveStruct&) = delete; - SolveStruct &operator=(SolveStruct &&) = delete; + SolveStruct& operator=(SolveStruct&&) = delete; ~SolveStruct() { @@ -161,7 +161,7 @@ namespace { void should_perform_transpose_kernel(std::shared_ptr exec, - bool &do_transpose) + bool& do_transpose) { #if (defined(CUDA_VERSION) && (CUDA_VERSION >= 9020)) @@ -180,7 +180,7 @@ void should_perform_transpose_kernel(std::shared_ptr exec, void init_struct_kernel(std::shared_ptr exec, - std::shared_ptr &solve_struct) + std::shared_ptr& solve_struct) { solve_struct = std::make_shared(); } @@ -188,13 +188,13 @@ void init_struct_kernel(std::shared_ptr exec, template void generate_kernel(std::shared_ptr exec, - const matrix::Csr *matrix, - solver::SolveStruct *solve_struct, + const matrix::Csr* matrix, + solver::SolveStruct* solve_struct, const gko::size_type num_rhs, bool is_upper) { if (cusparse::is_supported::value) { if (auto cuda_solve_struct = - dynamic_cast(solve_struct)) { + dynamic_cast(solve_struct)) { auto handle = exec->get_cusparse_handle(); if (is_upper) { GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSetMatFillMode( @@ -225,7 +225,7 @@ void generate_kernel(std::shared_ptr exec, exec->free(cuda_solve_struct->factor_work_vec); } cuda_solve_struct->factor_work_vec = - exec->alloc(cuda_solve_struct->factor_work_size); + exec->alloc(cuda_solve_struct->factor_work_size); cusparse::csrsm2_analysis( handle, cuda_solve_struct->algorithm, @@ -268,18 +268,18 @@ void generate_kernel(std::shared_ptr exec, template void solve_kernel(std::shared_ptr exec, - const matrix::Csr *matrix, - const solver::SolveStruct *solve_struct, - matrix::Dense *trans_b, - matrix::Dense *trans_x, - const matrix::Dense *b, - matrix::Dense *x) + const matrix::Csr* matrix, + const solver::SolveStruct* solve_struct, + matrix::Dense* trans_b, + matrix::Dense* trans_x, + const matrix::Dense* b, + matrix::Dense* x) { using vec = matrix::Dense; if (cusparse::is_supported::value) { if (auto cuda_solve_struct = - dynamic_cast(solve_struct)) { + dynamic_cast(solve_struct)) { ValueType one = 1.0; auto handle = exec->get_cusparse_handle(); diff --git a/cuda/solver/gmres_kernels.cu b/cuda/solver/gmres_kernels.cu index a401e8fd4d0..c32a9bb5a8b 100644 --- a/cuda/solver/gmres_kernels.cu +++ b/cuda/solver/gmres_kernels.cu @@ -78,11 +78,11 @@ constexpr int default_dot_size = default_dot_dim * default_dot_dim; template void initialize_1(std::shared_ptr exec, - const matrix::Dense *b, - matrix::Dense *residual, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - Array *stop_status, size_type krylov_dim) + const matrix::Dense* b, + matrix::Dense* residual, + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + Array* stop_status, size_type krylov_dim) { const auto num_threads = std::max(b->get_size()[0] * b->get_stride(), krylov_dim * b->get_size()[1]); @@ -104,11 +104,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_INITIALIZE_1_KERNEL); template void initialize_2(std::shared_ptr exec, - const matrix::Dense *residual, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - matrix::Dense *krylov_bases, - Array *final_iter_nums, size_type krylov_dim) + const matrix::Dense* residual, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + matrix::Dense* krylov_bases, + Array* final_iter_nums, size_type krylov_dim) { const auto num_rows = residual->get_size()[0]; const auto num_rhs = residual->get_size()[1]; @@ -137,9 +137,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_INITIALIZE_2_KERNEL); template void finish_arnoldi(std::shared_ptr exec, - size_type num_rows, matrix::Dense *krylov_bases, - matrix::Dense *hessenberg_iter, size_type iter, - const stopping_status *stop_status) + size_type num_rows, matrix::Dense* krylov_bases, + matrix::Dense* hessenberg_iter, size_type iter, + const stopping_status* stop_status) { const auto stride_krylov = krylov_bases->get_stride(); const auto stride_hessenberg = hessenberg_iter->get_stride(); @@ -208,12 +208,12 @@ void finish_arnoldi(std::shared_ptr exec, template void givens_rotation(std::shared_ptr exec, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense *hessenberg_iter, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - size_type iter, const Array *stop_status) + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense* hessenberg_iter, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + size_type iter, const Array* stop_status) { // TODO: tune block_size for optimal performance constexpr auto block_size = default_block_size; @@ -236,14 +236,14 @@ void givens_rotation(std::shared_ptr exec, template void step_1(std::shared_ptr exec, size_type num_rows, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - matrix::Dense *krylov_bases, - matrix::Dense *hessenberg_iter, size_type iter, - Array *final_iter_nums, - const Array *stop_status) + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + matrix::Dense* krylov_bases, + matrix::Dense* hessenberg_iter, size_type iter, + Array* final_iter_nums, + const Array* stop_status) { increase_final_iteration_numbers_kernel<<< static_cast( @@ -262,9 +262,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_STEP_1_KERNEL); template void solve_upper_triangular( - const matrix::Dense *residual_norm_collection, - const matrix::Dense *hessenberg, matrix::Dense *y, - const Array *final_iter_nums) + const matrix::Dense* residual_norm_collection, + const matrix::Dense* hessenberg, matrix::Dense* y, + const Array* final_iter_nums) { // TODO: tune block_size for optimal performance constexpr auto block_size = default_block_size; @@ -284,10 +284,10 @@ void solve_upper_triangular( template -void calculate_qy(const matrix::Dense *krylov_bases, - const matrix::Dense *y, - matrix::Dense *before_preconditioner, - const Array *final_iter_nums) +void calculate_qy(const matrix::Dense* krylov_bases, + const matrix::Dense* y, + matrix::Dense* before_preconditioner, + const Array* final_iter_nums) { const auto num_rows = before_preconditioner->get_size()[0]; const auto num_cols = krylov_bases->get_size()[1]; @@ -317,12 +317,12 @@ void calculate_qy(const matrix::Dense *krylov_bases, template void step_2(std::shared_ptr exec, - const matrix::Dense *residual_norm_collection, - const matrix::Dense *krylov_bases, - const matrix::Dense *hessenberg, - matrix::Dense *y, - matrix::Dense *before_preconditioner, - const Array *final_iter_nums) + const matrix::Dense* residual_norm_collection, + const matrix::Dense* krylov_bases, + const matrix::Dense* hessenberg, + matrix::Dense* y, + matrix::Dense* before_preconditioner, + const Array* final_iter_nums) { solve_upper_triangular(residual_norm_collection, hessenberg, y, final_iter_nums); diff --git a/cuda/solver/idr_kernels.cu b/cuda/solver/idr_kernels.cu index e05ddfd5317..1deee8d70e3 100644 --- a/cuda/solver/idr_kernels.cu +++ b/cuda/solver/idr_kernels.cu @@ -76,8 +76,8 @@ namespace { template -void initialize_m(const size_type nrhs, matrix::Dense *m, - Array *stop_status) +void initialize_m(const size_type nrhs, matrix::Dense* m, + Array* stop_status) { const auto subspace_dim = m->get_size()[0]; const auto m_stride = m->get_stride(); @@ -90,7 +90,7 @@ void initialize_m(const size_type nrhs, matrix::Dense *m, template -void initialize_subspace_vectors(matrix::Dense *subspace_vectors, +void initialize_subspace_vectors(matrix::Dense* subspace_vectors, bool deterministic) { if (deterministic) { @@ -110,7 +110,7 @@ void initialize_subspace_vectors(matrix::Dense *subspace_vectors, template -void orthonormalize_subspace_vectors(matrix::Dense *subspace_vectors) +void orthonormalize_subspace_vectors(matrix::Dense* subspace_vectors) { orthonormalize_subspace_vectors_kernel <<<1, default_block_size>>>( @@ -122,10 +122,10 @@ void orthonormalize_subspace_vectors(matrix::Dense *subspace_vectors) template void solve_lower_triangular(const size_type nrhs, - const matrix::Dense *m, - const matrix::Dense *f, - matrix::Dense *c, - const Array *stop_status) + const matrix::Dense* m, + const matrix::Dense* f, + matrix::Dense* c, + const Array* stop_status) { const auto subspace_dim = m->get_size()[0]; @@ -141,12 +141,12 @@ void solve_lower_triangular(const size_type nrhs, template void update_g_and_u(std::shared_ptr exec, const size_type nrhs, const size_type k, - const matrix::Dense *p, - const matrix::Dense *m, - matrix::Dense *alpha, - matrix::Dense *g, matrix::Dense *g_k, - matrix::Dense *u, - const Array *stop_status) + const matrix::Dense* p, + const matrix::Dense* m, + matrix::Dense* alpha, + matrix::Dense* g, matrix::Dense* g_k, + matrix::Dense* u, + const Array* stop_status) { const auto size = g->get_size()[0]; const auto p_stride = p->get_stride(); @@ -190,9 +190,9 @@ void update_g_and_u(std::shared_ptr exec, template void update_m(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense *p, - const matrix::Dense *g_k, matrix::Dense *m, - const Array *stop_status) + const size_type k, const matrix::Dense* p, + const matrix::Dense* g_k, matrix::Dense* m, + const Array* stop_status) { const auto size = g_k->get_size()[0]; const auto subspace_dim = m->get_size()[0]; @@ -223,12 +223,12 @@ void update_m(std::shared_ptr exec, const size_type nrhs, template void update_x_r_and_f(std::shared_ptr exec, const size_type nrhs, const size_type k, - const matrix::Dense *m, - const matrix::Dense *g, - const matrix::Dense *u, - matrix::Dense *f, matrix::Dense *r, - matrix::Dense *x, - const Array *stop_status) + const matrix::Dense* m, + const matrix::Dense* g, + const matrix::Dense* u, + matrix::Dense* f, matrix::Dense* r, + matrix::Dense* x, + const Array* stop_status) { const auto size = x->get_size()[0]; const auto subspace_dim = m->get_size()[0]; @@ -252,9 +252,9 @@ void update_x_r_and_f(std::shared_ptr exec, template void initialize(std::shared_ptr exec, const size_type nrhs, - matrix::Dense *m, - matrix::Dense *subspace_vectors, bool deterministic, - Array *stop_status) + matrix::Dense* m, + matrix::Dense* subspace_vectors, bool deterministic, + Array* stop_status) { initialize_m(nrhs, m, stop_status); initialize_subspace_vectors(subspace_vectors, deterministic); @@ -266,12 +266,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense *m, - const matrix::Dense *f, - const matrix::Dense *residual, - const matrix::Dense *g, matrix::Dense *c, - matrix::Dense *v, - const Array *stop_status) + const size_type k, const matrix::Dense* m, + const matrix::Dense* f, + const matrix::Dense* residual, + const matrix::Dense* g, matrix::Dense* c, + matrix::Dense* v, + const Array* stop_status) { solve_lower_triangular(nrhs, m, f, c, stop_status); @@ -293,10 +293,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense *omega, - const matrix::Dense *preconditioned_vector, - const matrix::Dense *c, matrix::Dense *u, - const Array *stop_status) + const size_type k, const matrix::Dense* omega, + const matrix::Dense* preconditioned_vector, + const matrix::Dense* c, matrix::Dense* u, + const Array* stop_status) { const auto num_rows = preconditioned_vector->get_size()[0]; const auto subspace_dim = u->get_size()[1] / nrhs; @@ -317,12 +317,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_2_KERNEL); template void step_3(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense *p, - matrix::Dense *g, matrix::Dense *g_k, - matrix::Dense *u, matrix::Dense *m, - matrix::Dense *f, matrix::Dense *alpha, - matrix::Dense *residual, matrix::Dense *x, - const Array *stop_status) + const size_type k, const matrix::Dense* p, + matrix::Dense* g, matrix::Dense* g_k, + matrix::Dense* u, matrix::Dense* m, + matrix::Dense* f, matrix::Dense* alpha, + matrix::Dense* residual, matrix::Dense* x, + const Array* stop_status) { update_g_and_u(exec, nrhs, k, p, m, alpha, g, g_k, u, stop_status); update_m(exec, nrhs, k, p, g_k, m, stop_status); @@ -335,9 +335,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_3_KERNEL); template void compute_omega( std::shared_ptr exec, const size_type nrhs, - const remove_complex kappa, const matrix::Dense *tht, - const matrix::Dense> *residual_norm, - matrix::Dense *omega, const Array *stop_status) + const remove_complex kappa, const matrix::Dense* tht, + const matrix::Dense>* residual_norm, + matrix::Dense* omega, const Array* stop_status) { const auto grid_dim = ceildiv(nrhs, config::warp_size); compute_omega_kernel<<>>( diff --git a/cuda/solver/lower_trs_kernels.cu b/cuda/solver/lower_trs_kernels.cu index 95da4d221a9..afbd545f1f1 100644 --- a/cuda/solver/lower_trs_kernels.cu +++ b/cuda/solver/lower_trs_kernels.cu @@ -63,14 +63,14 @@ namespace lower_trs { void should_perform_transpose(std::shared_ptr exec, - bool &do_transpose) + bool& do_transpose) { should_perform_transpose_kernel(exec, do_transpose); } void init_struct(std::shared_ptr exec, - std::shared_ptr &solve_struct) + std::shared_ptr& solve_struct) { init_struct_kernel(exec, solve_struct); } @@ -78,8 +78,8 @@ void init_struct(std::shared_ptr exec, template void generate(std::shared_ptr exec, - const matrix::Csr *matrix, - solver::SolveStruct *solve_struct, const gko::size_type num_rhs) + const matrix::Csr* matrix, + solver::SolveStruct* solve_struct, const gko::size_type num_rhs) { generate_kernel(exec, matrix, solve_struct, num_rhs, false); @@ -91,10 +91,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void solve(std::shared_ptr exec, - const matrix::Csr *matrix, - const solver::SolveStruct *solve_struct, - matrix::Dense *trans_b, matrix::Dense *trans_x, - const matrix::Dense *b, matrix::Dense *x) + const matrix::Csr* matrix, + const solver::SolveStruct* solve_struct, + matrix::Dense* trans_b, matrix::Dense* trans_x, + const matrix::Dense* b, matrix::Dense* x) { solve_kernel(exec, matrix, solve_struct, trans_b, trans_x, b, x); diff --git a/cuda/solver/upper_trs_kernels.cu b/cuda/solver/upper_trs_kernels.cu index 361a0738d0d..527dc3958a7 100644 --- a/cuda/solver/upper_trs_kernels.cu +++ b/cuda/solver/upper_trs_kernels.cu @@ -63,14 +63,14 @@ namespace upper_trs { void should_perform_transpose(std::shared_ptr exec, - bool &do_transpose) + bool& do_transpose) { should_perform_transpose_kernel(exec, do_transpose); } void init_struct(std::shared_ptr exec, - std::shared_ptr &solve_struct) + std::shared_ptr& solve_struct) { init_struct_kernel(exec, solve_struct); } @@ -78,8 +78,8 @@ void init_struct(std::shared_ptr exec, template void generate(std::shared_ptr exec, - const matrix::Csr *matrix, - solver::SolveStruct *solve_struct, const gko::size_type num_rhs) + const matrix::Csr* matrix, + solver::SolveStruct* solve_struct, const gko::size_type num_rhs) { generate_kernel(exec, matrix, solve_struct, num_rhs, true); @@ -91,10 +91,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void solve(std::shared_ptr exec, - const matrix::Csr *matrix, - const solver::SolveStruct *solve_struct, - matrix::Dense *trans_b, matrix::Dense *trans_x, - const matrix::Dense *b, matrix::Dense *x) + const matrix::Csr* matrix, + const solver::SolveStruct* solve_struct, + matrix::Dense* trans_b, matrix::Dense* trans_x, + const matrix::Dense* b, matrix::Dense* x) { solve_kernel(exec, matrix, solve_struct, trans_b, trans_x, b, x); diff --git a/cuda/stop/criterion_kernels.cu b/cuda/stop/criterion_kernels.cu index 2bc18045715..3ffb2363ec2 100644 --- a/cuda/stop/criterion_kernels.cu +++ b/cuda/stop/criterion_kernels.cu @@ -59,7 +59,7 @@ constexpr int default_block_size = 512; __global__ __launch_bounds__(default_block_size) void set_all_statuses( size_type num_elems, uint8 stoppingId, bool setFinalized, - stopping_status *stop_status) + stopping_status* stop_status) { const auto tidx = thread::get_thread_id_flat(); if (tidx < num_elems) { @@ -70,7 +70,7 @@ __global__ __launch_bounds__(default_block_size) void set_all_statuses( void set_all_statuses(std::shared_ptr exec, uint8 stoppingId, bool setFinalized, - Array *stop_status) + Array* stop_status) { const dim3 block_size(default_block_size, 1, 1); const dim3 grid_size(ceildiv(stop_status->get_num_elems(), block_size.x), 1, diff --git a/cuda/stop/residual_norm_kernels.cu b/cuda/stop/residual_norm_kernels.cu index 6297df89352..4ce24aa91c8 100644 --- a/cuda/stop/residual_norm_kernels.cu +++ b/cuda/stop/residual_norm_kernels.cu @@ -60,10 +60,10 @@ constexpr int default_block_size = 512; template __global__ __launch_bounds__(default_block_size) void residual_norm_kernel( size_type num_cols, ValueType rel_residual_goal, - const ValueType *__restrict__ tau, const ValueType *__restrict__ orig_tau, + const ValueType* __restrict__ tau, const ValueType* __restrict__ orig_tau, uint8 stoppingId, bool setFinalized, - stopping_status *__restrict__ stop_status, - bool *__restrict__ device_storage) + stopping_status* __restrict__ stop_status, + bool* __restrict__ device_storage) { const auto tidx = thread::get_thread_id_flat(); if (tidx < num_cols) { @@ -81,7 +81,7 @@ __global__ __launch_bounds__(default_block_size) void residual_norm_kernel( __global__ __launch_bounds__(1) void init_kernel( - bool *__restrict__ device_storage) + bool* __restrict__ device_storage) { device_storage[0] = true; device_storage[1] = false; @@ -90,12 +90,12 @@ __global__ __launch_bounds__(1) void init_kernel( template void residual_norm(std::shared_ptr exec, - const matrix::Dense *tau, - const matrix::Dense *orig_tau, + const matrix::Dense* tau, + const matrix::Dense* orig_tau, ValueType rel_residual_goal, uint8 stoppingId, - bool setFinalized, Array *stop_status, - Array *device_storage, bool *all_converged, - bool *one_changed) + bool setFinalized, Array* stop_status, + Array* device_storage, bool* all_converged, + bool* one_changed) { static_assert(is_complex_s::value == false, "ValueType must not be complex in this function!"); @@ -138,11 +138,11 @@ template __global__ __launch_bounds__(default_block_size) void implicit_residual_norm_kernel( size_type num_cols, remove_complex rel_residual_goal, - const ValueType *__restrict__ tau, - const remove_complex *__restrict__ orig_tau, + const ValueType* __restrict__ tau, + const remove_complex* __restrict__ orig_tau, uint8 stoppingId, bool setFinalized, - stopping_status *__restrict__ stop_status, - bool *__restrict__ device_storage) + stopping_status* __restrict__ stop_status, + bool* __restrict__ device_storage) { const auto tidx = thread::get_thread_id_flat(); if (tidx < num_cols) { @@ -160,7 +160,7 @@ __global__ __global__ __launch_bounds__(1) void init_kernel( - bool *__restrict__ device_storage) + bool* __restrict__ device_storage) { device_storage[0] = true; device_storage[1] = false; @@ -170,11 +170,11 @@ __global__ __launch_bounds__(1) void init_kernel( template void implicit_residual_norm( std::shared_ptr exec, - const matrix::Dense *tau, - const matrix::Dense> *orig_tau, + const matrix::Dense* tau, + const matrix::Dense>* orig_tau, remove_complex rel_residual_goal, uint8 stoppingId, - bool setFinalized, Array *stop_status, - Array *device_storage, bool *all_converged, bool *one_changed) + bool setFinalized, Array* stop_status, + Array* device_storage, bool* all_converged, bool* one_changed) { init_kernel<<<1, 1>>>(as_cuda_type(device_storage->get_data())); diff --git a/cuda/test/base/array.cu b/cuda/test/base/array.cu index 4d12cff3988..9f87e7885f0 100644 --- a/cuda/test/base/array.cu +++ b/cuda/test/base/array.cu @@ -51,7 +51,7 @@ protected: x.get_data()[1] = 2; } - static void assert_equal_to_original_x(gko::Array &a) + static void assert_equal_to_original_x(gko::Array& a) { ASSERT_EQ(a.get_num_elems(), 2); EXPECT_EQ(a.get_data()[0], T{5}); diff --git a/cuda/test/base/cuda_executor.cu b/cuda/test/base/cuda_executor.cu index 032054a1ca8..2cb985903c8 100644 --- a/cuda/test/base/cuda_executor.cu +++ b/cuda/test/base/cuda_executor.cu @@ -52,7 +52,7 @@ namespace { class ExampleOperation : public gko::Operation { public: - explicit ExampleOperation(int &val) : value(val) {} + explicit ExampleOperation(int& val) : value(val) {} void run(std::shared_ptr) const override { @@ -79,7 +79,7 @@ public: cudaGetDevice(&value); } - int &value; + int& value; }; @@ -139,7 +139,7 @@ TEST_F(CudaExecutor, MasterKnowsNumberOfDevices) TEST_F(CudaExecutor, AllocatesAndFreesMemory) { - int *ptr = nullptr; + int* ptr = nullptr; ASSERT_NO_THROW(ptr = cuda->alloc(2)); ASSERT_NO_THROW(cuda->free(ptr)); @@ -149,7 +149,7 @@ TEST_F(CudaExecutor, AllocatesAndFreesMemory) TEST_F(CudaExecutor, FailsWhenOverallocating) { const gko::size_type num_elems = 1ll << 50; // 4PB of integers - int *ptr = nullptr; + int* ptr = nullptr; ASSERT_THROW( { @@ -162,7 +162,7 @@ TEST_F(CudaExecutor, FailsWhenOverallocating) } -__global__ void check_data(int *data) +__global__ void check_data(int* data) { if (data[0] != 3 || data[1] != 8) { asm("trap;"); @@ -173,7 +173,7 @@ __global__ void check_data(int *data) TEST_F(CudaExecutor, CopiesDataToCuda) { int orig[] = {3, 8}; - auto *copy = cuda->alloc(2); + auto* copy = cuda->alloc(2); cuda->copy_from(omp.get(), 2, orig, copy); @@ -183,7 +183,7 @@ TEST_F(CudaExecutor, CopiesDataToCuda) } -__global__ void check_data2(int *data) +__global__ void check_data2(int* data) { if (data[0] != 4 || data[1] != 8) { asm("trap;"); @@ -194,7 +194,7 @@ __global__ void check_data2(int *data) TEST_F(CudaExecutor, CanAllocateOnUnifiedMemory) { int orig[] = {3, 8}; - auto *copy = cuda3->alloc(2); + auto* copy = cuda3->alloc(2); cuda3->copy_from(omp.get(), 2, orig, copy); @@ -206,7 +206,7 @@ TEST_F(CudaExecutor, CanAllocateOnUnifiedMemory) } -__global__ void init_data(int *data) +__global__ void init_data(int* data) { data[0] = 3; data[1] = 8; diff --git a/cuda/test/base/kernel_launch.cu b/cuda/test/base/kernel_launch.cu index d983085d974..abd4775290c 100644 --- a/cuda/test/base/kernel_launch.cu +++ b/cuda/test/base/kernel_launch.cu @@ -99,13 +99,13 @@ protected: // nvcc doesn't like device lambdas declared in complex classes, move it out -void run1d(std::shared_ptr exec, size_type dim, int *data) +void run1d(std::shared_ptr exec, size_type dim, int* data) { gko::kernels::cuda::run_kernel( exec, [] GKO_KERNEL(auto i, auto d) { static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); d[i] = i; }, dim, data); @@ -119,14 +119,14 @@ TEST_F(KernelLaunch, Runs1D) } -void run1d(std::shared_ptr exec, gko::Array &data) +void run1d(std::shared_ptr exec, gko::Array& data) { gko::kernels::cuda::run_kernel( exec, [] GKO_KERNEL(auto i, auto d, auto d_ptr) { static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); if (d == d_ptr) { d[i] = i; } else { @@ -144,16 +144,16 @@ TEST_F(KernelLaunch, Runs1DArray) } -void run1d(std::shared_ptr exec, gko::matrix::Dense<> *m) +void run1d(std::shared_ptr exec, gko::matrix::Dense<>* m) { gko::kernels::cuda::run_kernel( exec, [] GKO_KERNEL(auto i, auto d, auto d2, auto d_ptr) { static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); - static_assert(is_same::value, + static_assert(is_same::value, "type"); bool pointers_correct = d.data == d_ptr && d2.data == d_ptr; bool strides_correct = d.stride == 5 && d2.stride == 5; @@ -168,7 +168,7 @@ void run1d(std::shared_ptr exec, gko::matrix::Dense<> *m) d(i / 4, i % 4) = 0; } }, - 16, m, static_cast *>(m), + 16, m, static_cast*>(m), m->get_const_values()); } @@ -180,14 +180,14 @@ TEST_F(KernelLaunch, Runs1DDense) } -void run2d(std::shared_ptr exec, int *data) +void run2d(std::shared_ptr exec, int* data) { gko::kernels::cuda::run_kernel( exec, [] GKO_KERNEL(auto i, auto j, auto d) { static_assert(is_same::value, "index"); static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); d[i + 4 * j] = 4 * i + j; }, dim<2>{4, 4}, data); @@ -201,15 +201,15 @@ TEST_F(KernelLaunch, Runs2D) } -void run2d(std::shared_ptr exec, gko::Array &data) +void run2d(std::shared_ptr exec, gko::Array& data) { gko::kernels::cuda::run_kernel( exec, [] GKO_KERNEL(auto i, auto j, auto d, auto d_ptr) { static_assert(is_same::value, "index"); static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); if (d == d_ptr) { d[i + 4 * j] = 4 * i + j; } else { @@ -227,23 +227,23 @@ TEST_F(KernelLaunch, Runs2DArray) } -void run2d(std::shared_ptr exec, gko::matrix::Dense<> *m1, - gko::matrix::Dense<> *m2, gko::matrix::Dense<> *m3) +void run2d(std::shared_ptr exec, gko::matrix::Dense<>* m1, + gko::matrix::Dense<>* m2, gko::matrix::Dense<>* m3) { gko::kernels::cuda::run_kernel_solver( exec, [] GKO_KERNEL(auto i, auto j, auto d, auto d2, auto d_ptr, auto d3, auto d4, auto d2_ptr, auto d3_ptr) { static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); - static_assert(is_same::value, + static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); bool pointers_correct = d.data == d_ptr && d2.data == d_ptr && d3.data == d2_ptr && d4 == d3_ptr; bool strides_correct = @@ -262,7 +262,7 @@ void run2d(std::shared_ptr exec, gko::matrix::Dense<> *m1, } }, dim<2>{4, 4}, m2->get_stride(), m1, - static_cast *>(m1), m1->get_const_values(), + static_cast*>(m1), m1->get_const_values(), gko::kernels::cuda::default_stride(m2), gko::kernels::cuda::row_vector(m3), m2->get_values(), m3->get_values()); } diff --git a/cuda/test/base/lin_op.cu b/cuda/test/base/lin_op.cu index cc4ca8099b7..102d07dfc95 100644 --- a/cuda/test/base/lin_op.cu +++ b/cuda/test/base/lin_op.cu @@ -56,23 +56,23 @@ public: mutable std::shared_ptr last_beta_access; protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override { this->access(); - static_cast(b)->access(); - static_cast(x)->access(); + static_cast(b)->access(); + static_cast(x)->access(); last_b_access = b->get_executor(); last_x_access = x->get_executor(); } - void apply_impl(const gko::LinOp *alpha, const gko::LinOp *b, - const gko::LinOp *beta, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* alpha, const gko::LinOp* b, + const gko::LinOp* beta, gko::LinOp* x) const override { this->access(); - static_cast(alpha)->access(); - static_cast(b)->access(); - static_cast(beta)->access(); - static_cast(x)->access(); + static_cast(alpha)->access(); + static_cast(b)->access(); + static_cast(beta)->access(); + static_cast(x)->access(); last_alpha_access = alpha->get_executor(); last_b_access = b->get_executor(); last_beta_access = beta->get_executor(); diff --git a/cuda/test/base/math.cu b/cuda/test/base/math.cu index 19ee73ea3eb..eab728940f8 100644 --- a/cuda/test/base/math.cu +++ b/cuda/test/base/math.cu @@ -95,7 +95,7 @@ __device__ bool test_complex_is_finite_function(FuncType isfin) template -__global__ void test_real_is_finite(bool *result) +__global__ void test_real_is_finite(bool* result) { *result = kernel::test_real_is_finite_function( [](T val) { return gko::is_finite(val); }); @@ -103,7 +103,7 @@ __global__ void test_real_is_finite(bool *result) template -__global__ void test_complex_is_finite(bool *result) +__global__ void test_complex_is_finite(bool* result) { *result = kernel::test_complex_is_finite_function( [](ComplexType val) { return gko::is_finite(val); }); diff --git a/cuda/test/components/cooperative_groups_kernels.cu b/cuda/test/components/cooperative_groups_kernels.cu index 47f1e6446cd..3585c958ccd 100644 --- a/cuda/test/components/cooperative_groups_kernels.cu +++ b/cuda/test/components/cooperative_groups_kernels.cu @@ -95,7 +95,7 @@ protected: constexpr static int subwarp_size = config::warp_size / 4; -__device__ void test_assert(bool *success, bool partial) +__device__ void test_assert(bool* success, bool partial) { if (!partial) { *success = false; @@ -103,7 +103,7 @@ __device__ void test_assert(bool *success, bool partial) } -__global__ void cg_shuffle(bool *s) +__global__ void cg_shuffle(bool* s) { auto group = group::tiled_partition(group::this_thread_block()); @@ -116,7 +116,7 @@ __global__ void cg_shuffle(bool *s) TEST_F(CooperativeGroups, Shuffle) { test(cg_shuffle); } -__global__ void cg_all(bool *s) +__global__ void cg_all(bool* s) { auto group = group::tiled_partition(group::this_thread_block()); @@ -128,7 +128,7 @@ __global__ void cg_all(bool *s) TEST_F(CooperativeGroups, All) { test(cg_all); } -__global__ void cg_any(bool *s) +__global__ void cg_any(bool* s) { auto group = group::tiled_partition(group::this_thread_block()); @@ -140,7 +140,7 @@ __global__ void cg_any(bool *s) TEST_F(CooperativeGroups, Any) { test(cg_any); } -__global__ void cg_ballot(bool *s) +__global__ void cg_ballot(bool* s) { auto group = group::tiled_partition(group::this_thread_block()); @@ -152,7 +152,7 @@ __global__ void cg_ballot(bool *s) TEST_F(CooperativeGroups, Ballot) { test(cg_ballot); } -__global__ void cg_subwarp_shuffle(bool *s) +__global__ void cg_subwarp_shuffle(bool* s) { auto group = group::tiled_partition(group::this_thread_block()); @@ -177,7 +177,7 @@ TEST_F(CooperativeGroups, SubwarpShuffle) { test(cg_subwarp_shuffle); } TEST_F(CooperativeGroups, SubwarpShuffle2) { test_subwarp(cg_subwarp_shuffle); } -__global__ void cg_subwarp_all(bool *s) +__global__ void cg_subwarp_all(bool* s) { auto grp = threadIdx.x / subwarp_size; bool test_grp = grp == 1; @@ -204,7 +204,7 @@ TEST_F(CooperativeGroups, SubwarpAll) { test(cg_subwarp_all); } TEST_F(CooperativeGroups, SubwarpAll2) { test_subwarp(cg_subwarp_all); } -__global__ void cg_subwarp_any(bool *s) +__global__ void cg_subwarp_any(bool* s) { auto grp = threadIdx.x / subwarp_size; bool test_grp = grp == 1; @@ -231,7 +231,7 @@ TEST_F(CooperativeGroups, SubwarpAny) { test(cg_subwarp_any); } TEST_F(CooperativeGroups, SubwarpAny2) { test_subwarp(cg_subwarp_any); } -__global__ void cg_subwarp_ballot(bool *s) +__global__ void cg_subwarp_ballot(bool* s) { auto grp = threadIdx.x / subwarp_size; bool test_grp = grp == 1; diff --git a/cuda/test/components/merging_kernels.cu b/cuda/test/components/merging_kernels.cu index 60e1605da0c..434466d0679 100644 --- a/cuda/test/components/merging_kernels.cu +++ b/cuda/test/components/merging_kernels.cu @@ -149,8 +149,8 @@ protected: }; -__global__ void test_merge_step(const gko::int32 *a, const gko::int32 *b, - gko::int32 *c) +__global__ void test_merge_step(const gko::int32* a, const gko::int32* b, + gko::int32* c) { auto warp = tiled_partition(this_thread_block()); auto i = warp.thread_rank(); @@ -171,8 +171,8 @@ TEST_F(Merging, MergeStep) } -__global__ void test_merge(const gko::int32 *a, const gko::int32 *b, int size, - gko::int32 *c) +__global__ void test_merge(const gko::int32* a, const gko::int32* b, int size, + gko::int32* c) { auto warp = tiled_partition(this_thread_block()); group_merge(a, size, b, size, warp, @@ -200,8 +200,8 @@ TEST_F(Merging, FullMerge) } -__global__ void test_sequential_merge(const gko::int32 *a, const gko::int32 *b, - int size, gko::int32 *c) +__global__ void test_sequential_merge(const gko::int32* a, const gko::int32* b, + int size, gko::int32* c) { sequential_merge( a, size, b, size, @@ -226,11 +226,11 @@ TEST_F(Merging, SequentialFullMerge) } -__global__ void test_merge_idxs(const gko::int32 *a, const gko::int32 *b, - int size, gko::int32 *c, gko::int32 *aidxs, - gko::int32 *bidxs, gko::int32 *cidxs, - gko::int32 *refaidxs, gko::int32 *refbidxs, - gko::int32 *refcidxs) +__global__ void test_merge_idxs(const gko::int32* a, const gko::int32* b, + int size, gko::int32* c, gko::int32* aidxs, + gko::int32* bidxs, gko::int32* cidxs, + gko::int32* refaidxs, gko::int32* refbidxs, + gko::int32* refcidxs) { if (threadIdx.x == 0) { sequential_merge(a, size, b, size, diff --git a/cuda/test/components/searching_kernels.cu b/cuda/test/components/searching_kernels.cu index faadf2cf377..182d6a34daf 100644 --- a/cuda/test/components/searching_kernels.cu +++ b/cuda/test/components/searching_kernels.cu @@ -89,7 +89,7 @@ protected: }; -__device__ void test_assert(bool *success, bool predicate) +__device__ void test_assert(bool* success, bool predicate) { if (!predicate) { *success = false; @@ -97,7 +97,7 @@ __device__ void test_assert(bool *success, bool predicate) } -__global__ void test_binary_search(bool *success, int offset, int size) +__global__ void test_binary_search(bool* success, int offset, int size) { // test binary search on [offset, offset + size) // for all possible partition points @@ -126,7 +126,7 @@ TEST_F(Searching, BinaryOffset) } -__global__ void test_empty_binary_search(bool *success, int offset, int) +__global__ void test_empty_binary_search(bool* success, int offset, int) { auto result = binary_search(offset, 0, [&](int i) { // don't access out-of-bounds! @@ -147,7 +147,7 @@ TEST_F(Searching, BinaryEmptyOffset) } -__global__ void test_sync_binary_search(bool *success, int, int size) +__global__ void test_sync_binary_search(bool* success, int, int size) { // test binary search on [0, size) // for all possible partition points @@ -171,7 +171,7 @@ TEST_F(Searching, SyncBinary) } -__global__ void test_empty_sync_binary_search(bool *success, int, int) +__global__ void test_empty_sync_binary_search(bool* success, int, int) { auto result = synchronous_binary_search(0, [&](int i) { // don't access out-of-bounds! @@ -187,7 +187,7 @@ TEST_F(Searching, EmptySyncBinary) } -__global__ void test_warp_ary_search(bool *success, int offset, int size) +__global__ void test_warp_ary_search(bool* success, int offset, int size) { // test binary search on [offset, offset + size) // for all possible partition points @@ -215,7 +215,7 @@ TEST_F(Searching, WarpAryOffset) } -__global__ void test_warp_wide_search(bool *success, int offset, int size) +__global__ void test_warp_wide_search(bool* success, int offset, int size) { // test binary search on [offset, offset + size) // for all possible partition points diff --git a/cuda/test/components/sorting_kernels.cu b/cuda/test/components/sorting_kernels.cu index 6bcf98fdeca..4df4626be55 100644 --- a/cuda/test/components/sorting_kernels.cu +++ b/cuda/test/components/sorting_kernels.cu @@ -59,7 +59,7 @@ constexpr int num_local = 4; constexpr auto num_threads = num_elements / num_local; -__global__ void test_sort_shared(gko::int32 *data) +__global__ void test_sort_shared(gko::int32* data) { gko::int32 local[num_local]; __shared__ gko::int32 sh_local[num_elements]; @@ -73,14 +73,14 @@ __global__ void test_sort_shared(gko::int32 *data) } -__global__ void test_sort_warp(gko::int32 *data) +__global__ void test_sort_warp(gko::int32* data) { gko::int32 local[num_local]; for (int i = 0; i < num_local; ++i) { local[i] = data[threadIdx.x * num_local + i]; } bitonic_sort( - local, static_cast(nullptr)); + local, static_cast(nullptr)); for (int i = 0; i < num_local; ++i) { data[threadIdx.x * num_local + i] = local[i]; } diff --git a/cuda/test/factorization/par_ilu_kernels.cpp b/cuda/test/factorization/par_ilu_kernels.cpp index 3c877f7d38f..adfcd8fb194 100644 --- a/cuda/test/factorization/par_ilu_kernels.cpp +++ b/cuda/test/factorization/par_ilu_kernels.cpp @@ -134,10 +134,10 @@ class ParIlu : public ::testing::Test { return mtx; } - void initialize_row_ptrs(index_type *l_row_ptrs_ref, - index_type *u_row_ptrs_ref, - index_type *l_row_ptrs_cuda, - index_type *u_row_ptrs_cuda) + void initialize_row_ptrs(index_type* l_row_ptrs_ref, + index_type* u_row_ptrs_ref, + index_type* l_row_ptrs_cuda, + index_type* u_row_ptrs_cuda) { gko::kernels::reference::factorization::initialize_row_ptrs_l_u( ref, gko::lend(csr_ref), l_row_ptrs_ref, u_row_ptrs_ref); @@ -145,9 +145,9 @@ class ParIlu : public ::testing::Test { cuda, gko::lend(csr_cuda), l_row_ptrs_cuda, u_row_ptrs_cuda); } - void initialize_lu(std::unique_ptr *l_ref, std::unique_ptr *u_ref, - std::unique_ptr *l_cuda, - std::unique_ptr *u_cuda) + void initialize_lu(std::unique_ptr* l_ref, std::unique_ptr* u_ref, + std::unique_ptr* l_cuda, + std::unique_ptr* u_cuda) { auto num_row_ptrs = csr_ref->get_size()[0] + 1; gko::Array l_row_ptrs_ref{ref, num_row_ptrs}; @@ -185,13 +185,13 @@ class ParIlu : public ::testing::Test { template static std::unique_ptr static_unique_ptr_cast( - std::unique_ptr &&from) + std::unique_ptr&& from) { - return std::unique_ptr{static_cast(from.release())}; + return std::unique_ptr{static_cast(from.release())}; } - void compute_lu(std::unique_ptr *l_ref, std::unique_ptr *u_ref, - std::unique_ptr *l_cuda, std::unique_ptr *u_cuda, + void compute_lu(std::unique_ptr* l_ref, std::unique_ptr* u_ref, + std::unique_ptr* l_cuda, std::unique_ptr* u_cuda, gko::size_type iterations = 0) { auto coo_ref = Coo::create(ref); diff --git a/cuda/test/factorization/par_ilut_kernels.cpp b/cuda/test/factorization/par_ilut_kernels.cpp index 73307668f00..7496b893865 100644 --- a/cuda/test/factorization/par_ilut_kernels.cpp +++ b/cuda/test/factorization/par_ilut_kernels.cpp @@ -172,8 +172,8 @@ class ParIlut : public ::testing::Test { } template - void test_select(const std::unique_ptr &mtx, - const std::unique_ptr &dmtx, index_type rank, + void test_select(const std::unique_ptr& mtx, + const std::unique_ptr& dmtx, index_type rank, value_type tolerance = 0.0) { auto size = index_type(mtx->get_num_stored_elements()); @@ -197,8 +197,8 @@ class ParIlut : public ::testing::Test { template > - void test_filter(const std::unique_ptr &mtx, - const std::unique_ptr &dmtx, value_type threshold, + void test_filter(const std::unique_ptr& mtx, + const std::unique_ptr& dmtx, value_type threshold, bool lower) { auto res = Mtx::create(ref, mtx_size); @@ -226,8 +226,8 @@ class ParIlut : public ::testing::Test { template > - void test_filter_approx(const std::unique_ptr &mtx, - const std::unique_ptr &dmtx, index_type rank, + void test_filter_approx(const std::unique_ptr& mtx, + const std::unique_ptr& dmtx, index_type rank, value_type tolerance = 0.0) { auto res = Mtx::create(ref, mtx_size); @@ -333,7 +333,7 @@ TEST_F(ParIlut, KernelThresholdFilterNullptrCooIsEquivalentToRef) { auto res = Csr::create(ref, mtx_size); auto dres = Csr::create(cuda, mtx_size); - Coo *null_coo = nullptr; + Coo* null_coo = nullptr; gko::kernels::reference::par_ilut_factorization::threshold_filter( ref, mtx_l.get(), 0.5, res.get(), null_coo, true); @@ -422,7 +422,7 @@ TEST_F(ParIlut, KernelThresholdFilterApproxNullptrCooIsEquivalentToRef) test_filter(mtx_l, dmtx_l, 0.5, true); auto res = Csr::create(ref, mtx_size); auto dres = Csr::create(cuda, mtx_size); - Coo *null_coo = nullptr; + Coo* null_coo = nullptr; gko::Array tmp(ref); gko::Array dtmp(cuda); gko::remove_complex threshold{}; diff --git a/cuda/test/matrix/dense_kernels.cpp b/cuda/test/matrix/dense_kernels.cpp index 179a914018a..0dfd7e8c60f 100644 --- a/cuda/test/matrix/dense_kernels.cpp +++ b/cuda/test/matrix/dense_kernels.cpp @@ -137,7 +137,7 @@ class Dense : public ::testing::Test { std::shuffle(tmp2.begin(), tmp2.end(), rng); std::vector tmp3(x->get_size()[0] / 10); std::uniform_int_distribution row_dist(0, x->get_size()[0] - 1); - for (auto &i : tmp3) { + for (auto& i : tmp3) { i = row_dist(rng); } rpermute_idxs = @@ -149,7 +149,7 @@ class Dense : public ::testing::Test { } template - std::unique_ptr convert(InputType &&input) + std::unique_ptr convert(InputType&& input) { auto result = ConvertedType::create(input->get_executor()); input->convert_to(result.get()); @@ -571,8 +571,8 @@ TEST_F(Dense, IsTransposable) auto trans = x->transpose(); auto dtrans = dx->transpose(); - GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), - static_cast(trans.get()), 0); + GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), + static_cast(trans.get()), 0); } @@ -603,8 +603,8 @@ TEST_F(Dense, IsConjugateTransposable) auto trans = c_x->conj_transpose(); auto dtrans = dc_x->conj_transpose(); - GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), - static_cast(trans.get()), 0); + GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), + static_cast(trans.get()), 0); } diff --git a/cuda/test/matrix/diagonal_kernels.cpp b/cuda/test/matrix/diagonal_kernels.cpp index 193080c1e69..3f4cccdfbd6 100644 --- a/cuda/test/matrix/diagonal_kernels.cpp +++ b/cuda/test/matrix/diagonal_kernels.cpp @@ -238,9 +238,9 @@ TEST_F(Diagonal, ConjTransposeIsEquivalentToRef) set_up_complex_data(); auto trans = cdiag->conj_transpose(); - auto trans_diag = static_cast(trans.get()); + auto trans_diag = static_cast(trans.get()); auto dtrans = dcdiag->conj_transpose(); - auto dtrans_diag = static_cast(dtrans.get()); + auto dtrans_diag = static_cast(dtrans.get()); GKO_ASSERT_MTX_NEAR(trans_diag, dtrans_diag, 0); } diff --git a/cuda/test/multigrid/amgx_pgm_kernels.cpp b/cuda/test/multigrid/amgx_pgm_kernels.cpp index 9c55eca00fb..feeee3921ad 100644 --- a/cuda/test/multigrid/amgx_pgm_kernels.cpp +++ b/cuda/test/multigrid/amgx_pgm_kernels.cpp @@ -154,7 +154,7 @@ class AmgxPgm : public ::testing::Test { d_system_mtx = gko::clone(cuda, system_mtx); } - void make_weight(Mtx *mtx) + void make_weight(Mtx* mtx) { gko::test::make_symmetric(mtx); // only works for real value cases. diff --git a/cuda/test/preconditioner/isai_kernels.cpp b/cuda/test/preconditioner/isai_kernels.cpp index 9ff25abf2fe..409a25944c9 100644 --- a/cuda/test/preconditioner/isai_kernels.cpp +++ b/cuda/test/preconditioner/isai_kernels.cpp @@ -74,7 +74,7 @@ class Isai : public ::testing::Test { cuda = gko::CudaExecutor::create(0, ref); } - std::unique_ptr clone_allocations(const Csr *csr_mtx) + std::unique_ptr clone_allocations(const Csr* csr_mtx) { if (csr_mtx->get_executor() != ref) { return {nullptr}; @@ -121,14 +121,14 @@ class Isai : public ::testing::Test { } template - std::unique_ptr read(const char *name) + std::unique_ptr read(const char* name) { std::ifstream mtxstream{std::string{gko::matrices::location_isai_mtxs} + name}; auto result = gko::read(mtxstream, ref); // to avoid removing 0s, the matrices store 12345 instead for (gko::size_type i = 0; i < result->get_num_stored_elements(); ++i) { - auto &val = result->get_values()[i]; + auto& val = result->get_values()[i]; if (val == static_cast(12345.0)) { val = 0; } @@ -136,7 +136,7 @@ class Isai : public ::testing::Test { return std::move(result); } - void ensure_diagonal(Dense *mtx) + void ensure_diagonal(Dense* mtx) { for (int i = 0; i < mtx->get_size()[0]; ++i) { mtx->at(i, i) = gko::one(); diff --git a/cuda/test/solver/cb_gmres_kernels.cpp b/cuda/test/solver/cb_gmres_kernels.cpp index 2bc9b17e6c6..1e45fe7c085 100644 --- a/cuda/test/solver/cb_gmres_kernels.cpp +++ b/cuda/test/solver/cb_gmres_kernels.cpp @@ -95,7 +95,7 @@ class CbGmres : public ::testing::Test { Range3dHelper generate_krylov_helper(gko::dim<3> size) { auto helper = Range3dHelper{ref, size}; - auto &bases = helper.get_bases(); + auto& bases = helper.get_bases(); const auto num_rows = size[0] * size[1]; const auto num_cols = size[2]; auto temp_krylov_bases = gko::test::generate_random_matrix( @@ -191,7 +191,7 @@ class CbGmres : public ::testing::Test { void assert_krylov_bases_near() { gko::Array d_to_host{ref}; - auto &krylov_bases = range_helper.get_bases(); + auto& krylov_bases = range_helper.get_bases(); d_to_host = d_range_helper.get_bases(); const auto tolerance = r::value; using std::abs; diff --git a/dev_tools/oneapi/fake_interface/cooperative_groups.cuh b/dev_tools/oneapi/fake_interface/cooperative_groups.cuh index 752197e63c2..b4204d50aae 100644 --- a/dev_tools/oneapi/fake_interface/cooperative_groups.cuh +++ b/dev_tools/oneapi/fake_interface/cooperative_groups.cuh @@ -48,7 +48,7 @@ __device__ __forceinline__ grid_group this_grid_i() { return this_grid(); } __device__ auto this_thread_block_i() { return this_thread_block(); } template -__device__ __forceinline__ auto tiled_partition_i(const Group &g) +__device__ __forceinline__ auto tiled_partition_i(const Group& g) { return ::gko::kernels::cuda::group::tiled_partition(g); } diff --git a/devices/cuda/executor.cpp b/devices/cuda/executor.cpp index 06e8d504097..39721bc48ae 100644 --- a/devices/cuda/executor.cpp +++ b/devices/cuda/executor.cpp @@ -48,13 +48,13 @@ std::shared_ptr CudaExecutor::get_master() const noexcept } -bool CudaExecutor::verify_memory_to(const CudaExecutor *dest_exec) const +bool CudaExecutor::verify_memory_to(const CudaExecutor* dest_exec) const { return this->get_device_id() == dest_exec->get_device_id(); } -bool CudaExecutor::verify_memory_to(const HipExecutor *dest_exec) const +bool CudaExecutor::verify_memory_to(const HipExecutor* dest_exec) const { #if GINKGO_HIP_PLATFORM_NVCC return this->get_device_id() == dest_exec->get_device_id(); diff --git a/devices/device.cpp b/devices/device.cpp index ca6597d652d..36ae1ef7e53 100644 --- a/devices/device.cpp +++ b/devices/device.cpp @@ -40,28 +40,28 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { -std::mutex &nvidia_device::get_mutex(int i) +std::mutex& nvidia_device::get_mutex(int i) { static std::mutex mutex[max_devices]; return mutex[i]; } -int &nvidia_device::get_num_execs(int i) +int& nvidia_device::get_num_execs(int i) { static int num_execs[max_devices]; return num_execs[i]; } -std::mutex &amd_device::get_mutex(int i) +std::mutex& amd_device::get_mutex(int i) { static std::mutex mutex[max_devices]; return mutex[i]; } -int &amd_device::get_num_execs(int i) +int& amd_device::get_num_execs(int i) { static int num_execs[max_devices]; return num_execs[i]; diff --git a/devices/hip/executor.cpp b/devices/hip/executor.cpp index 53da0db72c2..a0abf893d7d 100644 --- a/devices/hip/executor.cpp +++ b/devices/hip/executor.cpp @@ -45,13 +45,13 @@ std::shared_ptr HipExecutor::get_master() const noexcept } -bool HipExecutor::verify_memory_to(const HipExecutor *dest_exec) const +bool HipExecutor::verify_memory_to(const HipExecutor* dest_exec) const { return this->get_device_id() == dest_exec->get_device_id(); } -bool HipExecutor::verify_memory_to(const CudaExecutor *dest_exec) const +bool HipExecutor::verify_memory_to(const CudaExecutor* dest_exec) const { #if GINKGO_HIP_PLATFORM_NVCC return this->get_device_id() == dest_exec->get_device_id(); diff --git a/devices/machine_topology.cpp b/devices/machine_topology.cpp index 4028ad31dae..de280e79262 100644 --- a/devices/machine_topology.cpp +++ b/devices/machine_topology.cpp @@ -51,14 +51,14 @@ class topo_bitmap { topo_bitmap() : bitmap(hwloc_bitmap_alloc()) {} ~topo_bitmap() { hwloc_bitmap_free(bitmap); } #endif - bitmap_type *get() { return bitmap; } + bitmap_type* get() { return bitmap; } private: - bitmap_type *bitmap; + bitmap_type* bitmap; }; -hwloc_topology *init_topology() +hwloc_topology* init_topology() { #if GKO_HAVE_HWLOC hwloc_topology_t tmp; @@ -81,8 +81,8 @@ hwloc_topology *init_topology() } // namespace detail -const MachineTopology::io_obj_info *MachineTopology::get_pci_device( - const std::string &pci_bus_id) const +const MachineTopology::io_obj_info* MachineTopology::get_pci_device( + const std::string& pci_bus_id) const { for (size_type id = 0; id < this->pci_devices_.size(); ++id) { if (this->pci_devices_[id].pci_bus_id.compare(0, 12, pci_bus_id, 0, @@ -124,8 +124,8 @@ MachineTopology::MachineTopology() void MachineTopology::hwloc_binding_helper( - const std::vector &obj, - const std::vector &bind_ids, const bool singlify) const + const std::vector& obj, + const std::vector& bind_ids, const bool singlify) const { #if GKO_HAVE_HWLOC detail::topo_bitmap bitmap_toset; @@ -149,7 +149,7 @@ void MachineTopology::hwloc_binding_helper( void MachineTopology::load_objects( hwloc_obj_type_t type, - std::vector &objects) const + std::vector& objects) const { #if GKO_HAVE_HWLOC // Get the number of normal objects of a certain type (Core, PU, Machine @@ -168,7 +168,7 @@ void MachineTopology::load_objects( inline int MachineTopology::get_obj_id_by_os_index( - const std::vector &objects, + const std::vector& objects, size_type os_index) const { #if GKO_HAVE_HWLOC @@ -183,7 +183,7 @@ inline int MachineTopology::get_obj_id_by_os_index( inline int MachineTopology::get_obj_id_by_gp_index( - const std::vector &objects, + const std::vector& objects, size_type gp_index) const { #if GKO_HAVE_HWLOC @@ -199,7 +199,7 @@ inline int MachineTopology::get_obj_id_by_gp_index( void MachineTopology::load_objects( hwloc_obj_type_t type, - std::vector &vector) const + std::vector& vector) const { #if GKO_HAVE_HWLOC GKO_ASSERT(this->cores_.size() != 0); diff --git a/devices/omp/executor.cpp b/devices/omp/executor.cpp index 3808d42969e..837451b7c84 100644 --- a/devices/omp/executor.cpp +++ b/devices/omp/executor.cpp @@ -44,7 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { -void OmpExecutor::populate_exec_info(const MachineTopology *mach_topo) +void OmpExecutor::populate_exec_info(const MachineTopology* mach_topo) { auto num_cores = (mach_topo->get_num_cores() == 0 ? 1 : mach_topo->get_num_cores()); @@ -55,7 +55,7 @@ void OmpExecutor::populate_exec_info(const MachineTopology *mach_topo) } -void OmpExecutor::raw_free(void *ptr) const noexcept { std::free(ptr); } +void OmpExecutor::raw_free(void* ptr) const noexcept { std::free(ptr); } std::shared_ptr OmpExecutor::get_master() noexcept @@ -70,14 +70,14 @@ std::shared_ptr OmpExecutor::get_master() const noexcept } -void *OmpExecutor::raw_alloc(size_type num_bytes) const +void* OmpExecutor::raw_alloc(size_type num_bytes) const { return GKO_ENSURE_ALLOCATED(std::malloc(num_bytes), "OMP", num_bytes); } -void OmpExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void OmpExecutor::raw_copy_to(const OmpExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { if (num_bytes > 0) { std::memcpy(dest_ptr, src_ptr, num_bytes); diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp index d0380127732..5c3e35c35bf 100644 --- a/dpcpp/base/executor.dp.cpp +++ b/dpcpp/base/executor.dp.cpp @@ -60,7 +60,7 @@ const std::vector get_devices(std::string device_type) {"host", sycl::info::device_type::host}, {"gpu", sycl::info::device_type::gpu}}; std::for_each(device_type.begin(), device_type.end(), - [](char &c) { c = std::tolower(c); }); + [](char& c) { c = std::tolower(c); }); return sycl::device::get_devices(device_type_map.at(device_type)); } @@ -68,8 +68,8 @@ const std::vector get_devices(std::string device_type) } // namespace detail -void OmpExecutor::raw_copy_to(const DpcppExecutor *dest, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void OmpExecutor::raw_copy_to(const DpcppExecutor* dest, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { if (num_bytes > 0) { dest->get_queue()->memcpy(dest_ptr, src_ptr, num_bytes).wait(); @@ -77,7 +77,7 @@ void OmpExecutor::raw_copy_to(const DpcppExecutor *dest, size_type num_bytes, } -bool OmpExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const +bool OmpExecutor::verify_memory_to(const DpcppExecutor* dest_exec) const { auto device = detail::get_devices( dest_exec->get_device_type())[dest_exec->get_device_id()]; @@ -93,14 +93,14 @@ std::shared_ptr DpcppExecutor::create( } -void DpcppExecutor::populate_exec_info(const MachineTopology *mach_topo) +void DpcppExecutor::populate_exec_info(const MachineTopology* mach_topo) { // Closest CPUs, NUMA node can be updated when there is a way to identify // the device itself, which is currently not available with DPC++. } -void DpcppExecutor::raw_free(void *ptr) const noexcept +void DpcppExecutor::raw_free(void* ptr) const noexcept { // the free function may syncronize excution or not, which depends on // implementation or backend, so it is not guaranteed. @@ -108,7 +108,7 @@ void DpcppExecutor::raw_free(void *ptr) const noexcept try { queue_->wait_and_throw(); sycl::free(ptr, queue_->get_context()); - } catch (cl::sycl::exception &err) { + } catch (cl::sycl::exception& err) { #if GKO_VERBOSE_LEVEL >= 1 // Unfortunately, if memory free fails, there's not much we can do std::cerr << "Unrecoverable Dpcpp error on device " @@ -128,16 +128,16 @@ void DpcppExecutor::raw_free(void *ptr) const noexcept } -void *DpcppExecutor::raw_alloc(size_type num_bytes) const +void* DpcppExecutor::raw_alloc(size_type num_bytes) const { - void *dev_ptr = sycl::malloc_device(num_bytes, *queue_.get()); + void* dev_ptr = sycl::malloc_device(num_bytes, *queue_.get()); GKO_ENSURE_ALLOCATED(dev_ptr, "DPC++", num_bytes); return dev_ptr; } -void DpcppExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void DpcppExecutor::raw_copy_to(const OmpExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { if (num_bytes > 0) { queue_->memcpy(dest_ptr, src_ptr, num_bytes).wait(); @@ -145,8 +145,8 @@ void DpcppExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes, } -void DpcppExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void DpcppExecutor::raw_copy_to(const CudaExecutor* dest, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { // TODO: later when possible, if we have DPC++ with a CUDA backend // support/compiler, we could maybe support native copies? @@ -154,15 +154,15 @@ void DpcppExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes, } -void DpcppExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void DpcppExecutor::raw_copy_to(const HipExecutor* dest, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { GKO_NOT_SUPPORTED(dest); } -void DpcppExecutor::raw_copy_to(const DpcppExecutor *dest, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void DpcppExecutor::raw_copy_to(const DpcppExecutor* dest, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { if (num_bytes > 0) { // If the queue is different and is not cpu/host, the queue can not @@ -187,7 +187,7 @@ void DpcppExecutor::raw_copy_to(const DpcppExecutor *dest, size_type num_bytes, void DpcppExecutor::synchronize() const { queue_->wait_and_throw(); } -void DpcppExecutor::run(const Operation &op) const +void DpcppExecutor::run(const Operation& op) const { this->template log(this, &op); op.run(std::static_pointer_cast( @@ -202,14 +202,14 @@ int DpcppExecutor::get_num_devices(std::string device_type) } -bool DpcppExecutor::verify_memory_to(const OmpExecutor *dest_exec) const +bool DpcppExecutor::verify_memory_to(const OmpExecutor* dest_exec) const { auto device = detail::get_devices( get_exec_info().device_type)[get_exec_info().device_id]; return device.is_host() || device.is_cpu(); } -bool DpcppExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const +bool DpcppExecutor::verify_memory_to(const DpcppExecutor* dest_exec) const { // If the queue is different and is not cpu/host, the queue can not access // the data from another queue (on the same device) @@ -227,7 +227,7 @@ bool DpcppExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const namespace detail { -void delete_queue(sycl::queue *queue) +void delete_queue(sycl::queue* queue) { queue->wait(); delete queue; @@ -247,10 +247,10 @@ void DpcppExecutor::set_device_property() try { auto subgroup_sizes = device.get_info(); - for (auto &i : subgroup_sizes) { + for (auto& i : subgroup_sizes) { this->get_exec_info().subgroup_sizes.push_back(i); } - } catch (cl::sycl::runtime_error &err) { + } catch (cl::sycl::runtime_error& err) { GKO_NOT_SUPPORTED(device); } } @@ -277,7 +277,7 @@ void DpcppExecutor::set_device_property() // `wait()` would be needed after every call to a DPC++ function or kernel. // For example, without `in_order`, doing a copy, a kernel, and a copy, will // not necessarily happen in that order by default, which we need to avoid. - auto *queue = new sycl::queue{device, sycl::property::queue::in_order{}}; + auto* queue = new sycl::queue{device, sycl::property::queue::in_order{}}; queue_ = std::move(queue_manager{queue, detail::delete_queue}); } diff --git a/dpcpp/base/helper.dp.cpp b/dpcpp/base/helper.dp.cpp index 5e6c1a579f5..0190e0d6d41 100644 --- a/dpcpp/base/helper.dp.cpp +++ b/dpcpp/base/helper.dp.cpp @@ -41,7 +41,7 @@ namespace kernels { namespace dpcpp { -bool validate(sycl::queue *queue, unsigned int workgroup_size, +bool validate(sycl::queue* queue, unsigned int workgroup_size, unsigned int subgroup_size) { auto device = queue->get_device(); @@ -50,7 +50,7 @@ bool validate(sycl::queue *queue, unsigned int workgroup_size, auto max_workgroup_size = device.get_info(); bool allowed = false; - for (auto &i : subgroup_size_list) { + for (auto& i : subgroup_size_list) { allowed |= (i == subgroup_size); } return allowed && (workgroup_size <= max_workgroup_size); diff --git a/dpcpp/base/helper.hpp b/dpcpp/base/helper.hpp index cb98e4c511e..90ec1cc05fe 100644 --- a/dpcpp/base/helper.hpp +++ b/dpcpp/base/helper.hpp @@ -58,10 +58,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define GKO_ENABLE_DEFAULT_HOST(name_, kernel_) \ template \ - void name_(dim3 grid, dim3 block, gko::size_type, sycl::queue *queue, \ + void name_(dim3 grid, dim3 block, gko::size_type, sycl::queue* queue, \ InferredArgs... args) \ { \ - queue->submit([&](sycl::handler &cgh) { \ + queue->submit([&](sycl::handler& cgh) { \ cgh.parallel_for(sycl_nd_range(grid, block), \ [=](sycl::nd_item<3> item_ct1) { \ kernel_(args..., item_ct1); \ @@ -81,9 +81,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_ENABLE_DEFAULT_HOST_CONFIG(name_, kernel_) \ template \ inline void name_(dim3 grid, dim3 block, gko::size_type, \ - sycl::queue *queue, InferredArgs... args) \ + sycl::queue* queue, InferredArgs... args) \ { \ - queue->submit([&](sycl::handler &cgh) { \ + queue->submit([&](sycl::handler& cgh) { \ cgh.parallel_for(sycl_nd_range(grid, block), \ [=](sycl::nd_item<3> item_ct1) { \ kernel_(args..., item_ct1); \ @@ -106,7 +106,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_ENABLE_DEFAULT_CONFIG_CALL(name_, callable_, list_) \ template \ void name_(std::uint32_t desired_cfg, dim3 grid, dim3 block, \ - gko::size_type dynamic_shared_memory, sycl::queue *queue, \ + gko::size_type dynamic_shared_memory, sycl::queue* queue, \ InferredArgs... args) \ { \ callable_( \ @@ -149,7 +149,7 @@ namespace dpcpp { * * @return the given arguments are valid or not in given queue. */ -bool validate(sycl::queue *queue, unsigned workgroup_size, +bool validate(sycl::queue* queue, unsigned workgroup_size, unsigned subgroup_size); @@ -166,9 +166,9 @@ bool validate(sycl::queue *queue, unsigned workgroup_size, * @return the first valid config */ template -std::uint32_t get_first_cfg(IterArr &arr, Validate verify) +std::uint32_t get_first_cfg(IterArr& arr, Validate verify) { - for (auto &cfg : arr) { + for (auto& cfg : arr) { if (verify(cfg)) { return cfg; } diff --git a/dpcpp/base/kernel_launch.dp.hpp b/dpcpp/base/kernel_launch.dp.hpp index 5e9d505ec52..4fe161ff320 100644 --- a/dpcpp/base/kernel_launch.dp.hpp +++ b/dpcpp/base/kernel_launch.dp.hpp @@ -45,7 +45,7 @@ namespace dpcpp { template -void generic_kernel_1d(sycl::handler &cgh, size_type size, KernelFunction fn, +void generic_kernel_1d(sycl::handler& cgh, size_type size, KernelFunction fn, KernelArgs... args) { cgh.parallel_for(sycl::range<1>{size}, [=](sycl::id<1> idx_id) { @@ -56,7 +56,7 @@ void generic_kernel_1d(sycl::handler &cgh, size_type size, KernelFunction fn, template -void generic_kernel_2d(sycl::handler &cgh, size_type rows, size_type cols, +void generic_kernel_2d(sycl::handler& cgh, size_type rows, size_type cols, KernelFunction fn, KernelArgs... args) { cgh.parallel_for(sycl::range<2>{rows, cols}, [=](sycl::id<2> idx) { @@ -69,18 +69,18 @@ void generic_kernel_2d(sycl::handler &cgh, size_type rows, size_type cols, template void run_kernel(std::shared_ptr exec, KernelFunction fn, - size_type size, KernelArgs &&... args) + size_type size, KernelArgs&&... args) { - exec->get_queue()->submit([&](sycl::handler &cgh) { + exec->get_queue()->submit([&](sycl::handler& cgh) { generic_kernel_1d(cgh, size, fn, map_to_device(args)...); }); } template void run_kernel(std::shared_ptr exec, KernelFunction fn, - dim<2> size, KernelArgs &&... args) + dim<2> size, KernelArgs&&... args) { - exec->get_queue()->submit([&](sycl::handler &cgh) { + exec->get_queue()->submit([&](sycl::handler& cgh) { generic_kernel_2d(cgh, size[0], size[1], fn, map_to_device(args)...); }); } diff --git a/dpcpp/base/kernel_launch_solver.dp.hpp b/dpcpp/base/kernel_launch_solver.dp.hpp index ea0a9ea7f89..5cec5b55d79 100644 --- a/dpcpp/base/kernel_launch_solver.dp.hpp +++ b/dpcpp/base/kernel_launch_solver.dp.hpp @@ -42,7 +42,7 @@ namespace dpcpp { template -void generic_kernel_2d_solver(sycl::handler &cgh, size_type rows, +void generic_kernel_2d_solver(sycl::handler& cgh, size_type rows, size_type cols, size_type default_stride, KernelFunction fn, KernelArgs... args) { @@ -59,9 +59,9 @@ void generic_kernel_2d_solver(sycl::handler &cgh, size_type rows, template void run_kernel_solver(std::shared_ptr exec, KernelFunction fn, dim<2> size, size_type default_stride, - KernelArgs &&... args) + KernelArgs&&... args) { - exec->get_queue()->submit([&](sycl::handler &cgh) { + exec->get_queue()->submit([&](sycl::handler& cgh) { kernels::dpcpp::generic_kernel_2d_solver( cgh, size[0], size[1], default_stride, fn, kernels::dpcpp::map_to_device(args)...); diff --git a/dpcpp/base/onemkl_bindings.hpp b/dpcpp/base/onemkl_bindings.hpp index d4503fc8fe0..2674e9932ad 100644 --- a/dpcpp/base/onemkl_bindings.hpp +++ b/dpcpp/base/onemkl_bindings.hpp @@ -72,7 +72,7 @@ namespace detail { template -inline void not_implemented(Args &&...) GKO_NOT_IMPLEMENTED; +inline void not_implemented(Args&&...) GKO_NOT_IMPLEMENTED; } // namespace detail @@ -95,9 +95,9 @@ struct is_supported> : std::true_type {}; #define GKO_BIND_DOT(ValueType, Name, Func) \ - inline void Name(::cl::sycl::queue &exec_queue, std::int64_t n, \ - const ValueType *x, std::int64_t incx, \ - const ValueType *y, std::int64_t incy, ValueType *result) \ + inline void Name(::cl::sycl::queue& exec_queue, std::int64_t n, \ + const ValueType* x, std::int64_t incx, \ + const ValueType* y, std::int64_t incy, ValueType* result) \ { \ Func(exec_queue, n, x, incx, y, incy, result); \ } \ diff --git a/dpcpp/components/absolute_array.dp.cpp b/dpcpp/components/absolute_array.dp.cpp index 42c5216f8a7..f6e2794b1ae 100644 --- a/dpcpp/components/absolute_array.dp.cpp +++ b/dpcpp/components/absolute_array.dp.cpp @@ -47,9 +47,9 @@ namespace components { template void inplace_absolute_array(std::shared_ptr exec, - ValueType *data, size_type n) + ValueType* data, size_type n) { - exec->get_queue()->submit([&](sycl::handler &cgh) { + exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> idx_id) { const auto idx = idx_id[0]; data[idx] = abs(data[idx]); @@ -62,10 +62,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_INPLACE_ABSOLUTE_ARRAY_KERNEL); template void outplace_absolute_array(std::shared_ptr exec, - const ValueType *in, size_type n, - remove_complex *out) + const ValueType* in, size_type n, + remove_complex* out) { - exec->get_queue()->submit([&](sycl::handler &cgh) { + exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> idx_id) { const auto idx = idx_id[0]; out[idx] = abs(in[idx]); diff --git a/dpcpp/components/atomic.dp.hpp b/dpcpp/components/atomic.dp.hpp index c2048b1510b..a5592e0581b 100644 --- a/dpcpp/components/atomic.dp.hpp +++ b/dpcpp/components/atomic.dp.hpp @@ -73,7 +73,7 @@ T atomic_compare_exchange_strong( template T atomic_compare_exchange_strong( - T *addr, T expected, T desired, + T* addr, T expected, T desired, cl::sycl::memory_order success = cl::sycl::memory_order::relaxed, cl::sycl::memory_order fail = cl::sycl::memory_order::relaxed) { @@ -86,7 +86,7 @@ T atomic_compare_exchange_strong( template inline T atomic_fetch_add( - T *addr, T operand, + T* addr, T operand, cl::sycl::memory_order memoryOrder = cl::sycl::memory_order::relaxed) { cl::sycl::atomic obj( @@ -98,7 +98,7 @@ inline T atomic_fetch_add( template inline T atomic_fetch_max( - T *addr, T operand, + T* addr, T operand, cl::sycl::memory_order memoryOrder = cl::sycl::memory_order::relaxed) { cl::sycl::atomic obj( @@ -116,7 +116,7 @@ namespace detail { template struct atomic_helper { - __dpct_inline__ static ValueType atomic_add(ValueType *, ValueType) + __dpct_inline__ static ValueType atomic_add(ValueType*, ValueType) { static_assert(sizeof(ValueType) == 0, "This default function is not implemented, only the " @@ -129,7 +129,7 @@ struct atomic_helper { template struct atomic_max_helper { - __dpct_inline__ static ValueType atomic_max(ValueType *, ValueType) + __dpct_inline__ static ValueType atomic_max(ValueType*, ValueType) { static_assert(sizeof(ValueType) == 0, "This default function is not implemented, only the " @@ -145,7 +145,7 @@ __dpct_inline__ ResultType reinterpret(ValueType val) static_assert(sizeof(ValueType) == sizeof(ResultType), "The type to reinterpret to must be of the same size as the " "original type."); - return reinterpret_cast(val); + return reinterpret_cast(val); } @@ -156,10 +156,10 @@ __dpct_inline__ ResultType reinterpret(ValueType val) addressSpace, ValueType, \ std::enable_if_t<(sizeof(ValueType) == sizeof(CONVERTER_TYPE))>> { \ __dpct_inline__ static ValueType atomic_add( \ - ValueType *__restrict__ addr, ValueType val) \ + ValueType* __restrict__ addr, ValueType val) \ { \ - CONVERTER_TYPE *address_as_converter = \ - reinterpret_cast(addr); \ + CONVERTER_TYPE* address_as_converter = \ + reinterpret_cast(addr); \ CONVERTER_TYPE old = *address_as_converter; \ CONVERTER_TYPE assumed; \ do { \ @@ -185,7 +185,7 @@ GKO_BIND_ATOMIC_HELPER_STRUCTURE(unsigned int); template \ struct atomic_helper> { \ __dpct_inline__ static ValueType atomic_add( \ - ValueType *__restrict__ addr, ValueType val) \ + ValueType* __restrict__ addr, ValueType val) \ { \ return atomic_fetch_add(addr, val); \ } \ @@ -202,11 +202,11 @@ template struct atomic_helper< addressSpace, ValueType, std::enable_if_t() && sizeof(ValueType) >= 16>> { - __dpct_inline__ static ValueType atomic_add(ValueType *__restrict__ addr, + __dpct_inline__ static ValueType atomic_add(ValueType* __restrict__ addr, ValueType val) { using real_type = remove_complex; - real_type *real_addr = reinterpret_cast(addr); + real_type* real_addr = reinterpret_cast(addr); // Separate to real part and imag part auto real = atomic_helper::atomic_add( &real_addr[0], val.real()); @@ -224,10 +224,10 @@ struct atomic_helper< addressSpace, ValueType, \ std::enable_if_t<(sizeof(ValueType) == sizeof(CONVERTER_TYPE))>> { \ __dpct_inline__ static ValueType atomic_max( \ - ValueType *__restrict__ addr, ValueType val) \ + ValueType* __restrict__ addr, ValueType val) \ { \ - CONVERTER_TYPE *address_as_converter = \ - reinterpret_cast(addr); \ + CONVERTER_TYPE* address_as_converter = \ + reinterpret_cast(addr); \ CONVERTER_TYPE old = *address_as_converter; \ CONVERTER_TYPE assumed; \ do { \ @@ -255,7 +255,7 @@ GKO_BIND_ATOMIC_MAX_STRUCTURE(unsigned int); struct atomic_max_helper> { \ __dpct_inline__ static ValueType atomic_max( \ - ValueType *__restrict__ addr, ValueType val) \ + ValueType* __restrict__ addr, ValueType val) \ { \ return atomic_fetch_max(addr, val); \ } \ @@ -273,7 +273,7 @@ GKO_BIND_ATOMIC_MAX_VALUETYPE(unsigned long long int); template -__dpct_inline__ T atomic_add(T *__restrict__ addr, T val) +__dpct_inline__ T atomic_add(T* __restrict__ addr, T val) { return detail::atomic_helper::atomic_add(addr, val); } @@ -281,7 +281,7 @@ __dpct_inline__ T atomic_add(T *__restrict__ addr, T val) template -__dpct_inline__ T atomic_max(T *__restrict__ addr, T val) +__dpct_inline__ T atomic_max(T* __restrict__ addr, T val) { return detail::atomic_max_helper::atomic_max(addr, val); } diff --git a/dpcpp/components/cooperative_groups.dp.hpp b/dpcpp/components/cooperative_groups.dp.hpp index 78f9d63d698..f1b2de71441 100644 --- a/dpcpp/components/cooperative_groups.dp.hpp +++ b/dpcpp/components/cooperative_groups.dp.hpp @@ -174,7 +174,7 @@ class thread_block_tile : public sycl::ONEAPI::sub_group { // note: intel calls nd_item.get_sub_group(), but it still call // intel::sub_group() to create the sub_group. template - explicit thread_block_tile(const Group &parent_group) + explicit thread_block_tile(const Group& parent_group) : data_{Size, 0}, sub_group() { #ifndef NDEBUG @@ -276,7 +276,7 @@ class thread_block_tile<1> { public: template - explicit thread_block_tile(const Group &parent_group) : data_{Size, 0} + explicit thread_block_tile(const Group& parent_group) : data_{Size, 0} {} @@ -354,7 +354,7 @@ __dpct_inline__ std::enable_if_t<(Size > 1) && Size <= 64 && !(Size & (Size - 1)), detail::thread_block_tile> tiled_partition - [[intel::reqd_sub_group_size(Size)]] (const Group &group) + [[intel::reqd_sub_group_size(Size)]] (const Group& group) { return detail::thread_block_tile(group); } @@ -362,7 +362,7 @@ __dpct_inline__ template __dpct_inline__ std::enable_if_t> -tiled_partition(const Group &group) +tiled_partition(const Group& group) { return detail::thread_block_tile(group); } @@ -388,7 +388,7 @@ struct is_communicator_group_impl> : std::true_type {}; class thread_block { - friend __dpct_inline__ thread_block this_thread_block(sycl::nd_item<3> &); + friend __dpct_inline__ thread_block this_thread_block(sycl::nd_item<3>&); public: __dpct_inline__ unsigned thread_rank() const noexcept { return data_.rank; } @@ -398,7 +398,7 @@ class thread_block { __dpct_inline__ void sync() const noexcept { group_.barrier(); } private: - __dpct_inline__ thread_block(sycl::nd_item<3> &group) + __dpct_inline__ thread_block(sycl::nd_item<3>& group) : group_{group}, data_{static_cast(group.get_local_range().size()), static_cast(group.get_local_linear_id())} @@ -408,11 +408,11 @@ class thread_block { unsigned rank; } data_; - sycl::nd_item<3> &group_; + sycl::nd_item<3>& group_; }; -__dpct_inline__ thread_block this_thread_block(sycl::nd_item<3> &group) +__dpct_inline__ thread_block this_thread_block(sycl::nd_item<3>& group) { return thread_block(group); } @@ -442,7 +442,7 @@ struct is_synchronizable_group_impl : std::true_type {}; * bit block) would have to be used to represent the full space of thread ranks. */ class grid_group { - friend __dpct_inline__ grid_group this_grid(sycl::nd_item<3> &); + friend __dpct_inline__ grid_group this_grid(sycl::nd_item<3>&); public: __dpct_inline__ unsigned size() const noexcept { return data_.size; } @@ -450,7 +450,7 @@ class grid_group { __dpct_inline__ unsigned thread_rank() const noexcept { return data_.rank; } private: - __dpct_inline__ grid_group(sycl::nd_item<3> &group) + __dpct_inline__ grid_group(sycl::nd_item<3>& group) : data_{static_cast(group.get_global_range().size()), static_cast(group.get_global_linear_id())} {} @@ -465,7 +465,7 @@ class grid_group { // grid_group this_grid() // using cooperative_groups::this_grid; // Instead, use our limited implementation: -__dpct_inline__ grid_group this_grid(sycl::nd_item<3> &group) +__dpct_inline__ grid_group this_grid(sycl::nd_item<3>& group) { return grid_group(group); } diff --git a/dpcpp/components/fill_array.dp.cpp b/dpcpp/components/fill_array.dp.cpp index 9edffc8eaf6..39d8961862b 100644 --- a/dpcpp/components/fill_array.dp.cpp +++ b/dpcpp/components/fill_array.dp.cpp @@ -43,10 +43,10 @@ namespace components { template -void fill_array(std::shared_ptr exec, ValueType *array, +void fill_array(std::shared_ptr exec, ValueType* array, size_type n, ValueType val) { - exec->get_queue()->submit([&](sycl::handler &cgh) { + exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> idx_id) { const auto idx = idx_id[0]; array[idx] = val; @@ -59,9 +59,9 @@ GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_FILL_ARRAY_KERNEL); template void fill_seq_array(std::shared_ptr exec, - ValueType *array, size_type n) + ValueType* array, size_type n) { - exec->get_queue()->submit([&](sycl::handler &cgh) { + exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{n}, [=](sycl::id<1> idx_id) { const auto idx = idx_id[0]; array[idx] = idx; diff --git a/dpcpp/components/format_conversion.dp.hpp b/dpcpp/components/format_conversion.dp.hpp index 99df6f02a4a..bce29ab6210 100644 --- a/dpcpp/components/format_conversion.dp.hpp +++ b/dpcpp/components/format_conversion.dp.hpp @@ -68,9 +68,9 @@ namespace kernel { */ template void count_nnz_per_row(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type num_rows, + sycl::queue* queue, size_type num_rows, size_type max_nnz_per_row, size_type stride, - const ValueType *values, IndexType *result); + const ValueType* values, IndexType* result); } // namespace kernel @@ -89,8 +89,8 @@ namespace kernel { template void convert_row_idxs_to_ptrs(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, const IndexType *idxs, - size_type num_nonzeros, IndexType *ptrs, + sycl::queue* queue, const IndexType* idxs, + size_type num_nonzeros, IndexType* ptrs, size_type length); diff --git a/dpcpp/components/matrix_operations.dp.hpp b/dpcpp/components/matrix_operations.dp.hpp index 0768242ce90..fb3496c95c7 100644 --- a/dpcpp/components/matrix_operations.dp.hpp +++ b/dpcpp/components/matrix_operations.dp.hpp @@ -49,7 +49,7 @@ namespace dpcpp { */ template remove_complex compute_inf_norm( - size_type num_rows, size_type num_cols, const ValueType *matrix, + size_type num_rows, size_type num_cols, const ValueType* matrix, size_type stride) GKO_NOT_IMPLEMENTED; diff --git a/dpcpp/components/prefix_sum.dp.cpp b/dpcpp/components/prefix_sum.dp.cpp index 63f33e9ba35..c733ec42188 100644 --- a/dpcpp/components/prefix_sum.dp.cpp +++ b/dpcpp/components/prefix_sum.dp.cpp @@ -67,7 +67,7 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(finalize_prefix_sum_call, finalize_prefix_sum, template -void prefix_sum(std::shared_ptr exec, IndexType *counts, +void prefix_sum(std::shared_ptr exec, IndexType* counts, size_type num_entries) { // prefix_sum should only be performed on a valid array diff --git a/dpcpp/components/prefix_sum.dp.hpp b/dpcpp/components/prefix_sum.dp.hpp index 3b5e1c317dd..ae995c8be48 100644 --- a/dpcpp/components/prefix_sum.dp.hpp +++ b/dpcpp/components/prefix_sum.dp.hpp @@ -75,8 +75,8 @@ namespace dpcpp { */ template __dpct_inline__ void subwarp_prefix_sum(ValueType element, - ValueType &prefix_sum, - ValueType &total_sum, Group subgroup) + ValueType& prefix_sum, + ValueType& total_sum, Group subgroup) { prefix_sum = inclusive ? element : zero(); total_sum = element; @@ -106,7 +106,7 @@ __dpct_inline__ void subwarp_prefix_sum(ValueType element, */ template __dpct_inline__ void subwarp_prefix_sum(ValueType element, - ValueType &prefix_sum, Group subgroup) + ValueType& prefix_sum, Group subgroup) { ValueType tmp{}; subwarp_prefix_sum(element, prefix_sum, tmp, subgroup); @@ -130,10 +130,10 @@ __dpct_inline__ void subwarp_prefix_sum(ValueType element, * `block_size`, `finalize_prefix_sum` has to be used as well. */ template -void start_prefix_sum(size_type num_elements, ValueType *__restrict__ elements, - ValueType *__restrict__ block_sum, +void start_prefix_sum(size_type num_elements, ValueType* __restrict__ elements, + ValueType* __restrict__ block_sum, sycl::nd_item<3> item_ct1, - UninitializedArray &prefix_helper) + UninitializedArray& prefix_helper) { const auto tidx = thread::get_thread_id_flat(item_ct1); const auto element_id = item_ct1.get_local_id(2); @@ -184,10 +184,10 @@ void start_prefix_sum(size_type num_elements, ValueType *__restrict__ elements, template void start_prefix_sum(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type num_elements, - ValueType *elements, ValueType *block_sum) + sycl::queue* queue, size_type num_elements, + ValueType* elements, ValueType* block_sum) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor, 0, sycl::access::mode::read_write, sycl::access::target::local> @@ -219,8 +219,8 @@ void start_prefix_sum(dim3 grid, dim3 block, size_type dynamic_shared_memory, */ template void finalize_prefix_sum(size_type num_elements, - ValueType *__restrict__ elements, - const ValueType *__restrict__ block_sum, + ValueType* __restrict__ elements, + const ValueType* __restrict__ block_sum, sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); @@ -236,10 +236,10 @@ void finalize_prefix_sum(size_type num_elements, template void finalize_prefix_sum(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type num_elements, - ValueType *elements, const ValueType *block_sum) + sycl::queue* queue, size_type num_elements, + ValueType* elements, const ValueType* block_sum) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { finalize_prefix_sum( diff --git a/dpcpp/components/reduction.dp.hpp b/dpcpp/components/reduction.dp.hpp index e7f7c8f5ab6..e5e4bf26de0 100644 --- a/dpcpp/components/reduction.dp.hpp +++ b/dpcpp/components/reduction.dp.hpp @@ -86,7 +86,7 @@ constexpr auto kcfg_1d_array = as_array(kcfg_1d_list); template < typename Group, typename ValueType, typename Operator, typename = std::enable_if_t::value>> -__dpct_inline__ ValueType reduce(const Group &group, ValueType local_data, +__dpct_inline__ ValueType reduce(const Group& group, ValueType local_data, Operator reduce_op = Operator{}) { #pragma unroll @@ -109,7 +109,7 @@ __dpct_inline__ ValueType reduce(const Group &group, ValueType local_data, template < typename Group, typename ValueType, typename = std::enable_if_t::value>> -__dpct_inline__ int choose_pivot(const Group &group, ValueType local_data, +__dpct_inline__ int choose_pivot(const Group& group, ValueType local_data, bool is_pivoted) { using real = remove_complex; @@ -142,7 +142,7 @@ template < unsigned int sg_size = config::warp_size, typename Group, typename ValueType, typename Operator, typename = std::enable_if_t::value>> -void reduce(const Group &__restrict__ group, ValueType *__restrict__ data, +void reduce(const Group& __restrict__ group, ValueType* __restrict__ data, Operator reduce_op = Operator{}) { const auto local_id = group.thread_rank(); @@ -176,8 +176,8 @@ void reduce(const Group &__restrict__ group, ValueType *__restrict__ data, */ template -void reduce_array(size_type size, const ValueType *__restrict__ source, - ValueType *__restrict__ result, sycl::nd_item<3> item_ct1, +void reduce_array(size_type size, const ValueType* __restrict__ source, + ValueType* __restrict__ result, sycl::nd_item<3> item_ct1, Operator reduce_op = Operator{}) { const auto tidx = thread::get_thread_id_flat(item_ct1); @@ -204,13 +204,13 @@ void reduce_array(size_type size, const ValueType *__restrict__ source, */ template void reduce_add_array( - size_type size, const ValueType *__restrict__ source, - ValueType *__restrict__ result, sycl::nd_item<3> item_ct1, - UninitializedArray(cfg)> &block_sum) + size_type size, const ValueType* __restrict__ source, + ValueType* __restrict__ result, sycl::nd_item<3> item_ct1, + UninitializedArray(cfg)>& block_sum) { reduce_array(cfg)>( - size, source, static_cast(block_sum), item_ct1, - [](const ValueType &x, const ValueType &y) { return x + y; }); + size, source, static_cast(block_sum), item_ct1, + [](const ValueType& x, const ValueType& y) { return x + y; }); if (item_ct1.get_local_id(2) == 0) { result[item_ct1.get_group(2)] = block_sum[0]; @@ -219,10 +219,10 @@ void reduce_add_array( template void reduce_add_array(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type size, - const ValueType *source, ValueType *result) + sycl::queue* queue, size_type size, + const ValueType* source, ValueType* result) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor(cfg)>, 0, sycl::access::mode::read_write, sycl::access::target::local> @@ -254,7 +254,7 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(reduce_add_array_call, reduce_add_array_config, */ template ValueType reduce_add_array(std::shared_ptr exec, - size_type size, const ValueType *source) + size_type size, const ValueType* source) { auto block_results_val = source; size_type grid_dim = size; diff --git a/dpcpp/components/segment_scan.dp.hpp b/dpcpp/components/segment_scan.dp.hpp index 09dc3e4e20f..004f71b9ec5 100644 --- a/dpcpp/components/segment_scan.dp.hpp +++ b/dpcpp/components/segment_scan.dp.hpp @@ -56,8 +56,8 @@ namespace dpcpp { */ template __dpct_inline__ bool segment_scan( - const group::thread_block_tile &group, const IndexType ind, - ValueType *__restrict__ val) + const group::thread_block_tile& group, const IndexType ind, + ValueType* __restrict__ val) { bool head = true; #pragma unroll diff --git a/dpcpp/components/uninitialized_array.hpp b/dpcpp/components/uninitialized_array.hpp index eb8a36770d7..5f345d0ac3f 100644 --- a/dpcpp/components/uninitialized_array.hpp +++ b/dpcpp/components/uninitialized_array.hpp @@ -66,7 +66,7 @@ class UninitializedArray { * * @return the constexpr pointer to the first entry of the array. */ - constexpr __dpct_inline__ operator const ValueType *() const noexcept + constexpr __dpct_inline__ operator const ValueType*() const noexcept { return &(*this)[0]; } @@ -77,7 +77,7 @@ class UninitializedArray { * * @return the non-const pointer to the first entry of the array. */ - __dpct_inline__ operator ValueType *() noexcept { return &(*this)[0]; } + __dpct_inline__ operator ValueType*() noexcept { return &(*this)[0]; } /** * constexpr array access operator. @@ -87,7 +87,7 @@ class UninitializedArray { * * @return a reference to the array entry at the given index. */ - constexpr __dpct_inline__ const ValueType &operator[](size_type pos) const + constexpr __dpct_inline__ const ValueType& operator[](size_type pos) const noexcept { return data_[pos]; @@ -101,7 +101,7 @@ class UninitializedArray { * * @return a reference to the array entry at the given index. */ - __dpct_inline__ ValueType &operator[](size_type pos) noexcept + __dpct_inline__ ValueType& operator[](size_type pos) noexcept { return data_[pos]; } diff --git a/dpcpp/factorization/factorization_kernels.dp.cpp b/dpcpp/factorization/factorization_kernels.dp.cpp index ebb45716c45..81ddbbc2277 100644 --- a/dpcpp/factorization/factorization_kernels.dp.cpp +++ b/dpcpp/factorization/factorization_kernels.dp.cpp @@ -58,21 +58,21 @@ namespace factorization { template void find_missing_diagonal_elements( - const matrix::Csr *mtx, - IndexType *elements_to_add_per_row, - bool *changes_required) GKO_NOT_IMPLEMENTED; + const matrix::Csr* mtx, + IndexType* elements_to_add_per_row, + bool* changes_required) GKO_NOT_IMPLEMENTED; template void add_missing_diagonal_elements( - const matrix::Csr *mtx, ValueType *new_values, - IndexType *new_col_idxs, - const IndexType *row_ptrs_addition) GKO_NOT_IMPLEMENTED; + const matrix::Csr* mtx, ValueType* new_values, + IndexType* new_col_idxs, + const IndexType* row_ptrs_addition) GKO_NOT_IMPLEMENTED; template void add_diagonal_elements(std::shared_ptr exec, - matrix::Csr *mtx, + matrix::Csr* mtx, bool is_sorted) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -82,8 +82,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_row_ptrs_l_u( std::shared_ptr exec, - const matrix::Csr *system_matrix, - IndexType *l_row_ptrs, IndexType *u_row_ptrs) GKO_NOT_IMPLEMENTED; + const matrix::Csr* system_matrix, + IndexType* l_row_ptrs, IndexType* u_row_ptrs) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_U_KERNEL); @@ -91,9 +91,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_l_u(std::shared_ptr exec, - const matrix::Csr *system_matrix, - matrix::Csr *csr_l, - matrix::Csr *csr_u) + const matrix::Csr* system_matrix, + matrix::Csr* csr_l, + matrix::Csr* csr_u) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -103,8 +103,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_row_ptrs_l( std::shared_ptr exec, - const matrix::Csr *system_matrix, - IndexType *l_row_ptrs) GKO_NOT_IMPLEMENTED; + const matrix::Csr* system_matrix, + IndexType* l_row_ptrs) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FACTORIZATION_INITIALIZE_ROW_PTRS_L_KERNEL); @@ -112,8 +112,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_l(std::shared_ptr exec, - const matrix::Csr *system_matrix, - matrix::Csr *csr_l, + const matrix::Csr* system_matrix, + matrix::Csr* csr_l, bool diag_sqrt) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/dpcpp/factorization/ic_kernels.dp.cpp b/dpcpp/factorization/ic_kernels.dp.cpp index 4ea40141287..2af760daa52 100644 --- a/dpcpp/factorization/ic_kernels.dp.cpp +++ b/dpcpp/factorization/ic_kernels.dp.cpp @@ -46,7 +46,7 @@ namespace ic_factorization { template void compute(std::shared_ptr exec, - matrix::Csr *m) GKO_NOT_IMPLEMENTED; + matrix::Csr* m) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_IC_COMPUTE_KERNEL); diff --git a/dpcpp/factorization/ilu_kernels.dp.cpp b/dpcpp/factorization/ilu_kernels.dp.cpp index 53a4ee2c636..2901238682a 100644 --- a/dpcpp/factorization/ilu_kernels.dp.cpp +++ b/dpcpp/factorization/ilu_kernels.dp.cpp @@ -46,7 +46,7 @@ namespace ilu_factorization { template void compute_lu(std::shared_ptr exec, - matrix::Csr *m) GKO_NOT_IMPLEMENTED; + matrix::Csr* m) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_ILU_COMPUTE_LU_KERNEL); diff --git a/dpcpp/factorization/par_ic_kernels.dp.cpp b/dpcpp/factorization/par_ic_kernels.dp.cpp index 9a1279927a5..8ea214800e0 100644 --- a/dpcpp/factorization/par_ic_kernels.dp.cpp +++ b/dpcpp/factorization/par_ic_kernels.dp.cpp @@ -53,7 +53,7 @@ namespace par_ic_factorization { template void init_factor(std::shared_ptr exec, - matrix::Csr *l) GKO_NOT_IMPLEMENTED; + matrix::Csr* l) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_PAR_IC_INIT_FACTOR_KERNEL); @@ -62,8 +62,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void compute_factor(std::shared_ptr exec, size_type iterations, - const matrix::Coo *a_lower, - matrix::Csr *l) GKO_NOT_IMPLEMENTED; + const matrix::Coo* a_lower, + matrix::Csr* l) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_PAR_IC_COMPUTE_FACTOR_KERNEL); diff --git a/dpcpp/factorization/par_ict_kernels.dp.cpp b/dpcpp/factorization/par_ict_kernels.dp.cpp index a782dba4af1..4e645502a53 100644 --- a/dpcpp/factorization/par_ict_kernels.dp.cpp +++ b/dpcpp/factorization/par_ict_kernels.dp.cpp @@ -63,9 +63,9 @@ namespace par_ict_factorization { template void compute_factor(std::shared_ptr exec, - const matrix::Csr *a, - matrix::Csr *l, - const matrix::Coo *) + const matrix::Csr* a, + matrix::Csr* l, + const matrix::Coo*) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -74,10 +74,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void add_candidates(std::shared_ptr exec, - const matrix::Csr *llh, - const matrix::Csr *a, - const matrix::Csr *l, - matrix::Csr *l_new) + const matrix::Csr* llh, + const matrix::Csr* a, + const matrix::Csr* l, + matrix::Csr* l_new) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/dpcpp/factorization/par_ilu_kernels.dp.cpp b/dpcpp/factorization/par_ilu_kernels.dp.cpp index 4087eed7717..dcc709670e7 100644 --- a/dpcpp/factorization/par_ilu_kernels.dp.cpp +++ b/dpcpp/factorization/par_ilu_kernels.dp.cpp @@ -55,9 +55,9 @@ namespace par_ilu_factorization { template void compute_l_u_factors( std::shared_ptr exec, size_type iterations, - const matrix::Coo *system_matrix, - matrix::Csr *l_factor, - matrix::Csr *u_factor) GKO_NOT_IMPLEMENTED; + const matrix::Coo* system_matrix, + matrix::Csr* l_factor, + matrix::Csr* u_factor) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_PAR_ILU_COMPUTE_L_U_FACTORS_KERNEL); diff --git a/dpcpp/factorization/par_ilut_kernels.dp.cpp b/dpcpp/factorization/par_ilut_kernels.dp.cpp index 5b224275827..7277f1eb828 100644 --- a/dpcpp/factorization/par_ilut_kernels.dp.cpp +++ b/dpcpp/factorization/par_ilut_kernels.dp.cpp @@ -67,10 +67,10 @@ namespace par_ilut_factorization { template void threshold_select(std::shared_ptr exec, - const matrix::Csr *m, - IndexType rank, Array &tmp, - Array> &, - remove_complex &threshold) GKO_NOT_IMPLEMENTED; + const matrix::Csr* m, + IndexType rank, Array& tmp, + Array>&, + remove_complex& threshold) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_PAR_ILUT_THRESHOLD_SELECT_KERNEL); @@ -84,18 +84,18 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( */ template void abstract_filter(std::shared_ptr exec, - const matrix::Csr *m, - matrix::Csr *m_out, - matrix::Coo *m_out_coo, + const matrix::Csr* m, + matrix::Csr* m_out, + matrix::Coo* m_out_coo, Predicate pred) GKO_NOT_IMPLEMENTED; template void threshold_filter(std::shared_ptr exec, - const matrix::Csr *m, + const matrix::Csr* m, remove_complex threshold, - matrix::Csr *m_out, - matrix::Coo *m_out_coo, + matrix::Csr* m_out, + matrix::Coo* m_out_coo, bool) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -109,10 +109,10 @@ constexpr auto sample_size = bucket_count * sampleselect_oversampling; template void threshold_filter_approx( std::shared_ptr exec, - const matrix::Csr *m, IndexType rank, - Array &tmp, remove_complex &threshold, - matrix::Csr *m_out, - matrix::Coo *m_out_coo) GKO_NOT_IMPLEMENTED; + const matrix::Csr* m, IndexType rank, + Array& tmp, remove_complex& threshold, + matrix::Csr* m_out, + matrix::Coo* m_out_coo) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_PAR_ILUT_THRESHOLD_FILTER_APPROX_KERNEL); @@ -120,12 +120,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void compute_l_u_factors(std::shared_ptr exec, - const matrix::Csr *a, - matrix::Csr *l, - const matrix::Coo *, - matrix::Csr *u, - const matrix::Coo *, - matrix::Csr *u_csc) + const matrix::Csr* a, + matrix::Csr* l, + const matrix::Coo*, + matrix::Csr* u, + const matrix::Coo*, + matrix::Csr* u_csc) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -134,12 +134,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void add_candidates(std::shared_ptr exec, - const matrix::Csr *lu, - const matrix::Csr *a, - const matrix::Csr *l, - const matrix::Csr *u, - matrix::Csr *l_new, - matrix::Csr *u_new) + const matrix::Csr* lu, + const matrix::Csr* a, + const matrix::Csr* l, + const matrix::Csr* u, + matrix::Csr* l_new, + matrix::Csr* u_new) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/dpcpp/matrix/coo_kernels.dp.cpp b/dpcpp/matrix/coo_kernels.dp.cpp index 561a0cddf8f..3d5ea25722d 100644 --- a/dpcpp/matrix/coo_kernels.dp.cpp +++ b/dpcpp/matrix/coo_kernels.dp.cpp @@ -99,11 +99,11 @@ namespace { template void spmv_kernel(const size_type nnz, const size_type num_lines, - const ValueType *__restrict__ val, - const IndexType *__restrict__ col, - const IndexType *__restrict__ row, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride, + const ValueType* __restrict__ val, + const IndexType* __restrict__ col, + const IndexType* __restrict__ row, + const ValueType* __restrict__ b, const size_type b_stride, + ValueType* __restrict__ c, const size_type c_stride, Closure scale, sycl::nd_item<3> item_ct1) { ValueType temp_val = zero(); @@ -155,32 +155,32 @@ void spmv_kernel(const size_type nnz, const size_type num_lines, template void abstract_spmv(const size_type nnz, const size_type num_lines, - const ValueType *__restrict__ val, - const IndexType *__restrict__ col, - const IndexType *__restrict__ row, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride, + const ValueType* __restrict__ val, + const IndexType* __restrict__ col, + const IndexType* __restrict__ row, + const ValueType* __restrict__ b, const size_type b_stride, + ValueType* __restrict__ c, const size_type c_stride, sycl::nd_item<3> item_ct1) { spmv_kernel( nnz, num_lines, val, col, row, b, b_stride, c, c_stride, - [](const ValueType &x) { return x; }, item_ct1); + [](const ValueType& x) { return x; }, item_ct1); } template void abstract_spmv(const size_type nnz, const size_type num_lines, - const ValueType *__restrict__ alpha, - const ValueType *__restrict__ val, - const IndexType *__restrict__ col, - const IndexType *__restrict__ row, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride, + const ValueType* __restrict__ alpha, + const ValueType* __restrict__ val, + const IndexType* __restrict__ col, + const IndexType* __restrict__ row, + const ValueType* __restrict__ b, const size_type b_stride, + ValueType* __restrict__ c, const size_type c_stride, sycl::nd_item<3> item_ct1) { ValueType scale_factor = alpha[0]; spmv_kernel( nnz, num_lines, val, col, row, b, b_stride, c, c_stride, - [&scale_factor](const ValueType &x) { return scale_factor * x; }, + [&scale_factor](const ValueType& x) { return scale_factor * x; }, item_ct1); } @@ -208,11 +208,11 @@ GKO_ENABLE_DEFAULT_HOST(abstract_spmv, abstract_spmv); */ template void spmm_kernel(const size_type nnz, const size_type num_elems, - const ValueType *__restrict__ val, - const IndexType *__restrict__ col, - const IndexType *__restrict__ row, const size_type num_cols, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride, + const ValueType* __restrict__ val, + const IndexType* __restrict__ col, + const IndexType* __restrict__ row, const size_type num_cols, + const ValueType* __restrict__ b, const size_type b_stride, + ValueType* __restrict__ c, const size_type c_stride, Closure scale, sycl::nd_item<3> item_ct1) { ValueType temp = zero(); @@ -246,32 +246,32 @@ void spmm_kernel(const size_type nnz, const size_type num_elems, template void abstract_spmm(const size_type nnz, const size_type num_elems, - const ValueType *__restrict__ val, - const IndexType *__restrict__ col, - const IndexType *__restrict__ row, const size_type num_cols, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride, + const ValueType* __restrict__ val, + const IndexType* __restrict__ col, + const IndexType* __restrict__ row, const size_type num_cols, + const ValueType* __restrict__ b, const size_type b_stride, + ValueType* __restrict__ c, const size_type c_stride, sycl::nd_item<3> item_ct1) { spmm_kernel( nnz, num_elems, val, col, row, num_cols, b, b_stride, c, c_stride, - [](const ValueType &x) { return x; }, item_ct1); + [](const ValueType& x) { return x; }, item_ct1); } template void abstract_spmm(const size_type nnz, const size_type num_elems, - const ValueType *__restrict__ alpha, - const ValueType *__restrict__ val, - const IndexType *__restrict__ col, - const IndexType *__restrict__ row, const size_type num_cols, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride, + const ValueType* __restrict__ alpha, + const ValueType* __restrict__ val, + const IndexType* __restrict__ col, + const IndexType* __restrict__ row, const size_type num_cols, + const ValueType* __restrict__ b, const size_type b_stride, + ValueType* __restrict__ c, const size_type c_stride, sycl::nd_item<3> item_ct1) { ValueType scale_factor = alpha[0]; spmm_kernel( nnz, num_elems, val, col, row, num_cols, b, b_stride, c, c_stride, - [&scale_factor](const ValueType &x) { return scale_factor * x; }, + [&scale_factor](const ValueType& x) { return scale_factor * x; }, item_ct1); } @@ -285,9 +285,9 @@ namespace kernel { template -void convert_row_idxs_to_ptrs(const IndexType *__restrict__ idxs, +void convert_row_idxs_to_ptrs(const IndexType* __restrict__ idxs, size_type num_nonzeros, - IndexType *__restrict__ ptrs, size_type length, + IndexType* __restrict__ ptrs, size_type length, sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); @@ -310,11 +310,11 @@ void convert_row_idxs_to_ptrs(const IndexType *__restrict__ idxs, template void convert_row_idxs_to_ptrs(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, const IndexType *idxs, - size_type num_nonzeros, IndexType *ptrs, + sycl::queue* queue, const IndexType* idxs, + size_type num_nonzeros, IndexType* ptrs, size_type length) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { convert_row_idxs_to_ptrs(idxs, num_nonzeros, ptrs, @@ -323,16 +323,16 @@ void convert_row_idxs_to_ptrs(dim3 grid, dim3 block, }); } -template void convert_row_idxs_to_ptrs(dim3, dim3, size_type, sycl::queue *, - const int32 *idxs, size_type, int32 *, +template void convert_row_idxs_to_ptrs(dim3, dim3, size_type, sycl::queue*, + const int32* idxs, size_type, int32*, size_type); -template void convert_row_idxs_to_ptrs(dim3, dim3, size_type, sycl::queue *, - const int64 *idxs, size_type, int64 *, +template void convert_row_idxs_to_ptrs(dim3, dim3, size_type, sycl::queue*, + const int64* idxs, size_type, int64*, size_type); template void initialize_zero_dense(size_type num_rows, size_type num_cols, - size_type stride, ValueType *__restrict__ result, + size_type stride, ValueType* __restrict__ result, sycl::nd_item<3> item_ct1) { const auto tidx_x = @@ -350,10 +350,10 @@ GKO_ENABLE_DEFAULT_HOST(initialize_zero_dense, initialize_zero_dense); template -void fill_in_dense(size_type nnz, const IndexType *__restrict__ row_idxs, - const IndexType *__restrict__ col_idxs, - const ValueType *__restrict__ values, size_type stride, - ValueType *__restrict__ result, sycl::nd_item<3> item_ct1) +void fill_in_dense(size_type nnz, const IndexType* __restrict__ row_idxs, + const IndexType* __restrict__ col_idxs, + const ValueType* __restrict__ values, size_type stride, + ValueType* __restrict__ result, sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); if (tidx < nnz) { @@ -369,8 +369,8 @@ GKO_ENABLE_DEFAULT_HOST(fill_in_dense, fill_in_dense); template void spmv(std::shared_ptr exec, - const matrix::Coo *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Coo* a, + const matrix::Dense* b, matrix::Dense* c) { dense::fill(exec, c, zero()); spmv2(exec, a, b, c); @@ -381,11 +381,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Coo *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Coo* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { dense::scale(exec, beta, c); advanced_spmv2(exec, alpha, a, b, c); @@ -397,8 +397,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void spmv2(std::shared_ptr exec, - const matrix::Coo *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Coo* a, + const matrix::Dense* b, matrix::Dense* c) { const auto nnz = a->get_num_stored_elements(); const auto b_ncols = b->get_size()[1]; @@ -433,10 +433,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV2_KERNEL); template void advanced_spmv2(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Coo *a, - const matrix::Dense *b, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Coo* a, + const matrix::Dense* b, + matrix::Dense* c) { const auto nnz = a->get_num_stored_elements(); const auto nwarps = host_kernel::calculate_nwarps(exec, nnz); @@ -473,8 +473,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_row_idxs_to_ptrs(std::shared_ptr exec, - const IndexType *idxs, size_type num_nonzeros, - IndexType *ptrs, size_type length) + const IndexType* idxs, size_type num_nonzeros, + IndexType* ptrs, size_type length) { const auto grid_dim = ceildiv(num_nonzeros, default_block_size); @@ -486,8 +486,8 @@ void convert_row_idxs_to_ptrs(std::shared_ptr exec, template void convert_to_csr(std::shared_ptr exec, - const matrix::Coo *source, - matrix::Csr *result) + const matrix::Coo* source, + matrix::Csr* result) { auto num_rows = result->get_size()[0]; @@ -506,8 +506,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Coo *source, - matrix::Dense *result) + const matrix::Coo* source, + matrix::Dense* result) { const auto num_rows = result->get_size()[0]; const auto num_cols = result->get_size()[1]; diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp index 3bbd03de871..7435ff35221 100644 --- a/dpcpp/matrix/csr_kernels.dp.cpp +++ b/dpcpp/matrix/csr_kernels.dp.cpp @@ -106,7 +106,7 @@ __dpct_inline__ T ceildivT(T nom, T denom) template __dpct_inline__ bool block_segment_scan_reverse( - const IndexType *__restrict__ ind, ValueType *__restrict__ val, + const IndexType* __restrict__ ind, ValueType* __restrict__ val, sycl::nd_item<3> item_ct1) { bool last = true; @@ -134,9 +134,9 @@ __dpct_inline__ bool block_segment_scan_reverse( template __dpct_inline__ void find_next_row( const IndexType num_rows, const IndexType data_size, const IndexType ind, - IndexType *__restrict__ row, IndexType *__restrict__ row_end, + IndexType* __restrict__ row, IndexType* __restrict__ row_end, const IndexType row_predict, const IndexType row_predict_end, - const IndexType *__restrict__ row_ptr) + const IndexType* __restrict__ row_ptr) { if (!overflow || ind < data_size) { if (ind >= *row_end) { @@ -157,8 +157,8 @@ __dpct_inline__ void find_next_row( template __dpct_inline__ void warp_atomic_add( - const group::thread_block_tile &group, bool force_write, - ValueType *__restrict__ val, const IndexType row, ValueType *__restrict__ c, + const group::thread_block_tile& group, bool force_write, + ValueType* __restrict__ val, const IndexType row, ValueType* __restrict__ c, const size_type c_stride, const IndexType column_id, Closure scale) { // do a local scan to avoid atomic collisions @@ -175,14 +175,14 @@ __dpct_inline__ void warp_atomic_add( template __dpct_inline__ void process_window( - const group::thread_block_tile &group, + const group::thread_block_tile& group, const IndexType num_rows, const IndexType data_size, const IndexType ind, - IndexType *__restrict__ row, IndexType *__restrict__ row_end, - IndexType *__restrict__ nrow, IndexType *__restrict__ nrow_end, - ValueType *__restrict__ temp_val, const ValueType *__restrict__ val, - const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, const ValueType *__restrict__ b, - const size_type b_stride, ValueType *__restrict__ c, + IndexType* __restrict__ row, IndexType* __restrict__ row_end, + IndexType* __restrict__ nrow, IndexType* __restrict__ nrow_end, + ValueType* __restrict__ temp_val, const ValueType* __restrict__ val, + const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, const ValueType* __restrict__ b, + const size_type b_stride, ValueType* __restrict__ c, const size_type c_stride, const IndexType column_id, Closure scale) { const IndexType curr_row = *row; @@ -216,10 +216,10 @@ __dpct_inline__ IndexType get_warp_start_idx(const IndexType nwarps, template __dpct_inline__ void spmv_kernel( const IndexType nwarps, const IndexType num_rows, - const ValueType *__restrict__ val, const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, const IndexType *__restrict__ srow, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride, Closure scale, + const ValueType* __restrict__ val, const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, const IndexType* __restrict__ srow, + const ValueType* __restrict__ b, const size_type b_stride, + ValueType* __restrict__ c, const size_type c_stride, Closure scale, sycl::nd_item<3> item_ct1) { const IndexType warp_idx = @@ -261,35 +261,35 @@ __dpct_inline__ void spmv_kernel( template void abstract_spmv(const IndexType nwarps, const IndexType num_rows, - const ValueType *__restrict__ val, - const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, - const IndexType *__restrict__ srow, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride, + const ValueType* __restrict__ val, + const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ srow, + const ValueType* __restrict__ b, const size_type b_stride, + ValueType* __restrict__ c, const size_type c_stride, sycl::nd_item<3> item_ct1) { spmv_kernel( nwarps, num_rows, val, col_idxs, row_ptrs, srow, b, b_stride, c, - c_stride, [](const ValueType &x) { return x; }, item_ct1); + c_stride, [](const ValueType& x) { return x; }, item_ct1); } template void abstract_spmv(const IndexType nwarps, const IndexType num_rows, - const ValueType *__restrict__ alpha, - const ValueType *__restrict__ val, - const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, - const IndexType *__restrict__ srow, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride, + const ValueType* __restrict__ alpha, + const ValueType* __restrict__ val, + const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ srow, + const ValueType* __restrict__ b, const size_type b_stride, + ValueType* __restrict__ c, const size_type c_stride, sycl::nd_item<3> item_ct1) { ValueType scale_factor = alpha[0]; spmv_kernel( nwarps, num_rows, val, col_idxs, row_ptrs, srow, b, b_stride, c, c_stride, - [&scale_factor](const ValueType &x) { return scale_factor * x; }, + [&scale_factor](const ValueType& x) { return scale_factor * x; }, item_ct1); } @@ -299,8 +299,8 @@ GKO_ENABLE_DEFAULT_HOST(abstract_spmv, abstract_spmv); template __dpct_inline__ void merge_path_search( const IndexType diagonal, const IndexType a_len, const IndexType b_len, - const IndexType *__restrict__ a, const IndexType offset_b, - IndexType *__restrict__ x, IndexType *__restrict__ y) + const IndexType* __restrict__ a, const IndexType offset_b, + IndexType* __restrict__ x, IndexType* __restrict__ y) { auto x_min = max(diagonal - b_len, zero()); auto x_max = min(diagonal, a_len); @@ -320,12 +320,12 @@ __dpct_inline__ void merge_path_search( template void merge_path_reduce(const IndexType nwarps, - const ValueType *__restrict__ last_val, - const IndexType *__restrict__ last_row, - ValueType *__restrict__ c, const size_type c_stride, + const ValueType* __restrict__ last_val, + const IndexType* __restrict__ last_row, + ValueType* __restrict__ c, const size_type c_stride, Alpha_op alpha_op, sycl::nd_item<3> item_ct1, - UninitializedArray &tmp_ind, - UninitializedArray &tmp_val) + UninitializedArray& tmp_ind, + UninitializedArray& tmp_val) { const IndexType cache_lines = ceildivT(nwarps, spmv_block_size); const IndexType tid = item_ct1.get_local_id(2); @@ -352,8 +352,8 @@ void merge_path_reduce(const IndexType nwarps, tmp_ind[item_ct1.get_local_id(2)] = row; group::this_thread_block(item_ct1).sync(); bool last = - block_segment_scan_reverse(static_cast(tmp_ind), - static_cast(tmp_val), item_ct1); + block_segment_scan_reverse(static_cast(tmp_ind), + static_cast(tmp_val), item_ct1); group::this_thread_block(item_ct1).sync(); if (last) { c[row * c_stride] += alpha_op(tmp_val[item_ct1.get_local_id(2)]); @@ -364,18 +364,18 @@ void merge_path_reduce(const IndexType nwarps, template void merge_path_spmv(const IndexType num_rows, - const ValueType *__restrict__ val, - const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, - const IndexType *__restrict__ srow, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride, - IndexType *__restrict__ row_out, - ValueType *__restrict__ val_out, Alpha_op alpha_op, + const ValueType* __restrict__ val, + const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ srow, + const ValueType* __restrict__ b, const size_type b_stride, + ValueType* __restrict__ c, const size_type c_stride, + IndexType* __restrict__ row_out, + ValueType* __restrict__ val_out, Alpha_op alpha_op, Beta_op beta_op, sycl::nd_item<3> item_ct1, - IndexType *shared_row_ptrs) + IndexType* shared_row_ptrs) { - const auto *row_end_ptrs = row_ptrs + 1; + const auto* row_end_ptrs = row_ptrs + 1; const auto nnz = row_ptrs[num_rows]; const IndexType num_merge_items = num_rows + nnz; const auto block_items = spmv_block_size * items_per_thread; @@ -426,9 +426,9 @@ void merge_path_spmv(const IndexType num_rows, } } group::this_thread_block(item_ct1).sync(); - IndexType *tmp_ind = shared_row_ptrs; - ValueType *tmp_val = - reinterpret_cast(shared_row_ptrs + spmv_block_size); + IndexType* tmp_ind = shared_row_ptrs; + ValueType* tmp_val = + reinterpret_cast(shared_row_ptrs + spmv_block_size); tmp_val[item_ct1.get_local_id(2)] = value; tmp_ind[item_ct1.get_local_id(2)] = row_i; group::this_thread_block(item_ct1).sync(); @@ -443,32 +443,32 @@ void merge_path_spmv(const IndexType num_rows, template void abstract_merge_path_spmv( - const IndexType num_rows, const ValueType *__restrict__ val, - const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, const IndexType *__restrict__ srow, - const ValueType *__restrict__ b, const size_type b_stride, - ValueType *__restrict__ c, const size_type c_stride, - IndexType *__restrict__ row_out, ValueType *__restrict__ val_out, - sycl::nd_item<3> item_ct1, IndexType *shared_row_ptrs) + const IndexType num_rows, const ValueType* __restrict__ val, + const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, const IndexType* __restrict__ srow, + const ValueType* __restrict__ b, const size_type b_stride, + ValueType* __restrict__ c, const size_type c_stride, + IndexType* __restrict__ row_out, ValueType* __restrict__ val_out, + sycl::nd_item<3> item_ct1, IndexType* shared_row_ptrs) { merge_path_spmv( num_rows, val, col_idxs, row_ptrs, srow, b, b_stride, c, c_stride, - row_out, val_out, [](ValueType &x) { return x; }, - [](ValueType &x) { return zero(); }, item_ct1, + row_out, val_out, [](ValueType& x) { return x; }, + [](ValueType& x) { return zero(); }, item_ct1, shared_row_ptrs); } template void abstract_merge_path_spmv(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, const IndexType num_rows, - const ValueType *val, const IndexType *col_idxs, - const IndexType *row_ptrs, const IndexType *srow, - const ValueType *b, const size_type b_stride, - ValueType *c, const size_type c_stride, - IndexType *row_out, ValueType *val_out) -{ - queue->submit([&](sycl::handler &cgh) { + sycl::queue* queue, const IndexType num_rows, + const ValueType* val, const IndexType* col_idxs, + const IndexType* row_ptrs, const IndexType* srow, + const ValueType* b, const size_type b_stride, + ValueType* c, const size_type c_stride, + IndexType* row_out, ValueType* val_out) +{ + queue->submit([&](sycl::handler& cgh) { sycl::accessor shared_row_ptrs_acc_ct1( @@ -479,7 +479,7 @@ void abstract_merge_path_spmv(dim3 grid, dim3 block, abstract_merge_path_spmv( num_rows, val, col_idxs, row_ptrs, srow, b, b_stride, c, c_stride, row_out, val_out, item_ct1, - static_cast( + static_cast( shared_row_ptrs_acc_ct1.get_pointer())); }); }); @@ -488,34 +488,34 @@ void abstract_merge_path_spmv(dim3 grid, dim3 block, template void abstract_merge_path_spmv( - const IndexType num_rows, const ValueType *__restrict__ alpha, - const ValueType *__restrict__ val, const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, const IndexType *__restrict__ srow, - const ValueType *__restrict__ b, const size_type b_stride, - const ValueType *__restrict__ beta, ValueType *__restrict__ c, - const size_type c_stride, IndexType *__restrict__ row_out, - ValueType *__restrict__ val_out, sycl::nd_item<3> item_ct1, - IndexType *shared_row_ptrs) + const IndexType num_rows, const ValueType* __restrict__ alpha, + const ValueType* __restrict__ val, const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, const IndexType* __restrict__ srow, + const ValueType* __restrict__ b, const size_type b_stride, + const ValueType* __restrict__ beta, ValueType* __restrict__ c, + const size_type c_stride, IndexType* __restrict__ row_out, + ValueType* __restrict__ val_out, sycl::nd_item<3> item_ct1, + IndexType* shared_row_ptrs) { const auto alpha_val = alpha[0]; const auto beta_val = beta[0]; merge_path_spmv( num_rows, val, col_idxs, row_ptrs, srow, b, b_stride, c, c_stride, - row_out, val_out, [&alpha_val](ValueType &x) { return alpha_val * x; }, - [&beta_val](ValueType &x) { return beta_val * x; }, item_ct1, + row_out, val_out, [&alpha_val](ValueType& x) { return alpha_val * x; }, + [&beta_val](ValueType& x) { return beta_val * x; }, item_ct1, shared_row_ptrs); } template void abstract_merge_path_spmv( - dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue, - const IndexType num_rows, const ValueType *alpha, const ValueType *val, - const IndexType *col_idxs, const IndexType *row_ptrs, const IndexType *srow, - const ValueType *b, const size_type b_stride, const ValueType *beta, - ValueType *c, const size_type c_stride, IndexType *row_out, - ValueType *val_out) -{ - queue->submit([&](sycl::handler &cgh) { + dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue, + const IndexType num_rows, const ValueType* alpha, const ValueType* val, + const IndexType* col_idxs, const IndexType* row_ptrs, const IndexType* srow, + const ValueType* b, const size_type b_stride, const ValueType* beta, + ValueType* c, const size_type c_stride, IndexType* row_out, + ValueType* val_out) +{ + queue->submit([&](sycl::handler& cgh) { sycl::accessor shared_row_ptrs_acc_ct1( @@ -526,7 +526,7 @@ void abstract_merge_path_spmv( abstract_merge_path_spmv( num_rows, alpha, val, col_idxs, row_ptrs, srow, b, b_stride, beta, c, c_stride, row_out, val_out, item_ct1, - static_cast( + static_cast( shared_row_ptrs_acc_ct1.get_pointer())); }); }); @@ -535,25 +535,25 @@ void abstract_merge_path_spmv( template void abstract_reduce(const IndexType nwarps, - const ValueType *__restrict__ last_val, - const IndexType *__restrict__ last_row, - ValueType *__restrict__ c, const size_type c_stride, + const ValueType* __restrict__ last_val, + const IndexType* __restrict__ last_row, + ValueType* __restrict__ c, const size_type c_stride, sycl::nd_item<3> item_ct1, - UninitializedArray &tmp_ind, - UninitializedArray &tmp_val) + UninitializedArray& tmp_ind, + UninitializedArray& tmp_val) { merge_path_reduce( - nwarps, last_val, last_row, c, c_stride, [](ValueType &x) { return x; }, + nwarps, last_val, last_row, c, c_stride, [](ValueType& x) { return x; }, item_ct1, tmp_ind, tmp_val); } template void abstract_reduce(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, const IndexType nwarps, - const ValueType *last_val, const IndexType *last_row, - ValueType *c, const size_type c_stride) + sycl::queue* queue, const IndexType nwarps, + const ValueType* last_val, const IndexType* last_row, + ValueType* c, const size_type c_stride) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor, 0, sycl::access_mode::read_write, sycl::access::target::local> @@ -575,29 +575,29 @@ void abstract_reduce(dim3 grid, dim3 block, size_type dynamic_shared_memory, template void abstract_reduce(const IndexType nwarps, - const ValueType *__restrict__ last_val, - const IndexType *__restrict__ last_row, - const ValueType *__restrict__ alpha, - ValueType *__restrict__ c, const size_type c_stride, + const ValueType* __restrict__ last_val, + const IndexType* __restrict__ last_row, + const ValueType* __restrict__ alpha, + ValueType* __restrict__ c, const size_type c_stride, sycl::nd_item<3> item_ct1, - UninitializedArray &tmp_ind, - UninitializedArray &tmp_val) + UninitializedArray& tmp_ind, + UninitializedArray& tmp_val) { const auto alpha_val = alpha[0]; merge_path_reduce( nwarps, last_val, last_row, c, c_stride, - [&alpha_val](ValueType &x) { return alpha_val * x; }, item_ct1, tmp_ind, + [&alpha_val](ValueType& x) { return alpha_val * x; }, item_ct1, tmp_ind, tmp_val); } template void abstract_reduce(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, const IndexType nwarps, - const ValueType *last_val, const IndexType *last_row, - const ValueType *alpha, ValueType *c, + sycl::queue* queue, const IndexType nwarps, + const ValueType* last_val, const IndexType* last_row, + const ValueType* alpha, ValueType* c, const size_type c_stride) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor, 0, sycl::access_mode::read_write, sycl::access::target::local> @@ -620,11 +620,11 @@ void abstract_reduce(dim3 grid, dim3 block, size_type dynamic_shared_memory, template void device_classical_spmv(const size_type num_rows, - const ValueType *__restrict__ val, - const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, - const ValueType *__restrict__ b, - const size_type b_stride, ValueType *__restrict__ c, + const ValueType* __restrict__ val, + const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, + const ValueType* __restrict__ b, + const size_type b_stride, ValueType* __restrict__ c, const size_type c_stride, Closure scale, sycl::nd_item<3> item_ct1) { @@ -643,7 +643,7 @@ void device_classical_spmv(const size_type num_rows, } auto subgroup_result = ::gko::kernels::dpcpp::reduce( subgroup_tile, temp_val, - [](const ValueType &a, const ValueType &b) { return a + b; }); + [](const ValueType& a, const ValueType& b) { return a + b; }); // TODO: check the barrier subgroup_tile.sync(); if (subid == 0) { @@ -656,27 +656,27 @@ void device_classical_spmv(const size_type num_rows, template void abstract_classical_spmv( - const size_type num_rows, const ValueType *__restrict__ val, - const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, const ValueType *__restrict__ b, - const size_type b_stride, ValueType *__restrict__ c, + const size_type num_rows, const ValueType* __restrict__ val, + const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, const ValueType* __restrict__ b, + const size_type b_stride, ValueType* __restrict__ c, const size_type c_stride, sycl::nd_item<3> item_ct1) { device_classical_spmv( num_rows, val, col_idxs, row_ptrs, b, b_stride, c, c_stride, - [](const ValueType &x, const ValueType &y) { return x; }, item_ct1); + [](const ValueType& x, const ValueType& y) { return x; }, item_ct1); } template void abstract_classical_spmv(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, const size_type num_rows, - const ValueType *val, const IndexType *col_idxs, - const IndexType *row_ptrs, const ValueType *b, - const size_type b_stride, ValueType *c, + sycl::queue* queue, const size_type num_rows, + const ValueType* val, const IndexType* col_idxs, + const IndexType* row_ptrs, const ValueType* b, + const size_type b_stride, ValueType* c, const size_type c_stride) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { abstract_classical_spmv(num_rows, val, col_idxs, @@ -689,18 +689,18 @@ void abstract_classical_spmv(dim3 grid, dim3 block, template void abstract_classical_spmv( - const size_type num_rows, const ValueType *__restrict__ alpha, - const ValueType *__restrict__ val, const IndexType *__restrict__ col_idxs, - const IndexType *__restrict__ row_ptrs, const ValueType *__restrict__ b, - const size_type b_stride, const ValueType *__restrict__ beta, - ValueType *__restrict__ c, const size_type c_stride, + const size_type num_rows, const ValueType* __restrict__ alpha, + const ValueType* __restrict__ val, const IndexType* __restrict__ col_idxs, + const IndexType* __restrict__ row_ptrs, const ValueType* __restrict__ b, + const size_type b_stride, const ValueType* __restrict__ beta, + ValueType* __restrict__ c, const size_type c_stride, sycl::nd_item<3> item_ct1) { const auto alpha_val = alpha[0]; const auto beta_val = beta[0]; device_classical_spmv( num_rows, val, col_idxs, row_ptrs, b, b_stride, c, c_stride, - [&alpha_val, &beta_val](const ValueType &x, const ValueType &y) { + [&alpha_val, &beta_val](const ValueType& x, const ValueType& y) { return alpha_val * x + beta_val * y; }, item_ct1); @@ -709,14 +709,14 @@ void abstract_classical_spmv( template void abstract_classical_spmv(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, const size_type num_rows, - const ValueType *alpha, const ValueType *val, - const IndexType *col_idxs, - const IndexType *row_ptrs, const ValueType *b, - const size_type b_stride, const ValueType *beta, - ValueType *c, const size_type c_stride) -{ - queue->submit([&](sycl::handler &cgh) { + sycl::queue* queue, const size_type num_rows, + const ValueType* alpha, const ValueType* val, + const IndexType* col_idxs, + const IndexType* row_ptrs, const ValueType* b, + const size_type b_stride, const ValueType* beta, + ValueType* c, const size_type c_stride) +{ + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { abstract_classical_spmv( @@ -729,8 +729,8 @@ void abstract_classical_spmv(dim3 grid, dim3 block, template void convert_row_ptrs_to_idxs(size_type num_rows, - const IndexType *__restrict__ ptrs, - IndexType *__restrict__ idxs, + const IndexType* __restrict__ ptrs, + IndexType* __restrict__ idxs, sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); @@ -746,7 +746,7 @@ GKO_ENABLE_DEFAULT_HOST(convert_row_ptrs_to_idxs, convert_row_ptrs_to_idxs); template void initialize_zero_dense(size_type num_rows, size_type num_cols, - size_type stride, ValueType *__restrict__ result, + size_type stride, ValueType* __restrict__ result, sycl::nd_item<3> item_ct1) { const auto tidx_x = @@ -764,10 +764,10 @@ GKO_ENABLE_DEFAULT_HOST(initialize_zero_dense, initialize_zero_dense); template -void fill_in_dense(size_type num_rows, const IndexType *__restrict__ row_ptrs, - const IndexType *__restrict__ col_idxs, - const ValueType *__restrict__ values, size_type stride, - ValueType *__restrict__ result, sycl::nd_item<3> item_ct1) +void fill_in_dense(size_type num_rows, const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, + const ValueType* __restrict__ values, size_type stride, + ValueType* __restrict__ result, sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); if (tidx < num_rows) { @@ -782,8 +782,8 @@ GKO_ENABLE_DEFAULT_HOST(fill_in_dense, fill_in_dense); template void calculate_nnz_per_row(size_type num_rows, - const IndexType *__restrict__ row_ptrs, - size_type *__restrict__ nnz_per_row, + const IndexType* __restrict__ row_ptrs, + size_type* __restrict__ nnz_per_row, sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); @@ -797,9 +797,9 @@ GKO_ENABLE_DEFAULT_HOST(calculate_nnz_per_row, calculate_nnz_per_row); void calculate_slice_lengths(size_type num_rows, size_type slice_size, size_type stride_factor, - const size_type *__restrict__ nnz_per_row, - size_type *__restrict__ slice_lengths, - size_type *__restrict__ slice_sets, + const size_type* __restrict__ nnz_per_row, + size_type* __restrict__ slice_lengths, + size_type* __restrict__ slice_sets, sycl::nd_item<3> item_ct1) { constexpr auto warp_size = config::warp_size; @@ -819,7 +819,7 @@ void calculate_slice_lengths(size_type num_rows, size_type slice_size, group::this_thread_block(item_ct1)); auto warp_result = ::gko::kernels::dpcpp::reduce( warp_tile, thread_result, - [](const size_type &a, const size_type &b) { return max(a, b); }); + [](const size_type& a, const size_type& b) { return max(a, b); }); if (tid_in_warp == 0) { auto slice_length = @@ -835,13 +835,13 @@ GKO_ENABLE_DEFAULT_HOST(calculate_slice_lengths, calculate_slice_lengths); template void fill_in_sellp(size_type num_rows, size_type slice_size, - const ValueType *__restrict__ source_values, - const IndexType *__restrict__ source_row_ptrs, - const IndexType *__restrict__ source_col_idxs, - size_type *__restrict__ slice_lengths, - size_type *__restrict__ slice_sets, - IndexType *__restrict__ result_col_idxs, - ValueType *__restrict__ result_values, + const ValueType* __restrict__ source_values, + const IndexType* __restrict__ source_row_ptrs, + const IndexType* __restrict__ source_col_idxs, + size_type* __restrict__ slice_lengths, + size_type* __restrict__ slice_sets, + IndexType* __restrict__ result_col_idxs, + ValueType* __restrict__ result_values, sycl::nd_item<3> item_ct1) { const auto global_row = thread::get_thread_id_flat(item_ct1); @@ -872,8 +872,8 @@ GKO_ENABLE_DEFAULT_HOST(fill_in_sellp, fill_in_sellp); template void initialize_zero_ell(size_type max_nnz_per_row, size_type stride, - ValueType *__restrict__ values, - IndexType *__restrict__ col_idxs, + ValueType* __restrict__ values, + IndexType* __restrict__ col_idxs, sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); @@ -889,11 +889,11 @@ GKO_ENABLE_DEFAULT_HOST(initialize_zero_ell, initialize_zero_ell); template void fill_in_ell(size_type num_rows, size_type stride, - const ValueType *__restrict__ source_values, - const IndexType *__restrict__ source_row_ptrs, - const IndexType *__restrict__ source_col_idxs, - ValueType *__restrict__ result_values, - IndexType *__restrict__ result_col_idxs, + const ValueType* __restrict__ source_values, + const IndexType* __restrict__ source_row_ptrs, + const IndexType* __restrict__ source_col_idxs, + ValueType* __restrict__ result_values, + IndexType* __restrict__ result_col_idxs, sycl::nd_item<3> item_ct1) { constexpr auto warp_size = config::warp_size; @@ -917,8 +917,8 @@ GKO_ENABLE_DEFAULT_HOST(fill_in_ell, fill_in_ell); void reduce_max_nnz_per_slice(size_type num_rows, size_type slice_size, size_type stride_factor, - const size_type *__restrict__ nnz_per_row, - size_type *__restrict__ result, + const size_type* __restrict__ nnz_per_row, + size_type* __restrict__ result, sycl::nd_item<3> item_ct1) { constexpr auto warp_size = config::warp_size; @@ -937,7 +937,7 @@ void reduce_max_nnz_per_slice(size_type num_rows, size_type slice_size, } auto warp_result = ::gko::kernels::dpcpp::reduce( warp_tile, thread_result, - [](const size_type &a, const size_type &b) { return max(a, b); }); + [](const size_type& a, const size_type& b) { return max(a, b); }); if (tid_in_warp == 0 && warpid < slice_num) { result[warpid] = ceildiv(warp_result, stride_factor) * stride_factor; @@ -948,12 +948,12 @@ GKO_ENABLE_DEFAULT_HOST(reduce_max_nnz_per_slice, reduce_max_nnz_per_slice); void reduce_total_cols(size_type num_slices, - const size_type *__restrict__ max_nnz_per_slice, - size_type *__restrict__ result, - sycl::nd_item<3> item_ct1, size_type *block_result) + const size_type* __restrict__ max_nnz_per_slice, + size_type* __restrict__ result, + sycl::nd_item<3> item_ct1, size_type* block_result) { reduce_array(num_slices, max_nnz_per_slice, block_result, item_ct1, - [](const size_type &x, const size_type &y) { return x + y; }); + [](const size_type& x, const size_type& y) { return x + y; }); if (item_ct1.get_local_id(2) == 0) { result[item_ct1.get_group(2)] = block_result[0]; @@ -961,10 +961,10 @@ void reduce_total_cols(size_type num_slices, } void reduce_total_cols(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type num_slices, - const size_type *max_nnz_per_slice, size_type *result) + sycl::queue* queue, size_type num_slices, + const size_type* max_nnz_per_slice, size_type* result) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor block_result_acc_ct1(sycl::range<1>(default_block_size), cgh); @@ -978,13 +978,13 @@ void reduce_total_cols(dim3 grid, dim3 block, size_type dynamic_shared_memory, } -void reduce_max_nnz(size_type size, const size_type *__restrict__ nnz_per_row, - size_type *__restrict__ result, sycl::nd_item<3> item_ct1, - size_type *block_max) +void reduce_max_nnz(size_type size, const size_type* __restrict__ nnz_per_row, + size_type* __restrict__ result, sycl::nd_item<3> item_ct1, + size_type* block_max) { reduce_array( size, nnz_per_row, block_max, item_ct1, - [](const size_type &x, const size_type &y) { return max(x, y); }); + [](const size_type& x, const size_type& y) { return max(x, y); }); if (item_ct1.get_local_id(2) == 0) { result[item_ct1.get_group(2)] = block_max[0]; @@ -992,10 +992,10 @@ void reduce_max_nnz(size_type size, const size_type *__restrict__ nnz_per_row, } void reduce_max_nnz(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type size, - const size_type *nnz_per_row, size_type *result) + sycl::queue* queue, size_type size, + const size_type* nnz_per_row, size_type* result) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor block_max_acc_ct1(sycl::range<1>(default_block_size), cgh); @@ -1012,8 +1012,8 @@ void reduce_max_nnz(dim3 grid, dim3 block, size_type dynamic_shared_memory, template void calculate_hybrid_coo_row_nnz(size_type num_rows, size_type ell_max_nnz_per_row, - IndexType *__restrict__ csr_row_idxs, - size_type *__restrict__ coo_row_nnz, + IndexType* __restrict__ csr_row_idxs, + size_type* __restrict__ coo_row_nnz, sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); @@ -1031,15 +1031,15 @@ GKO_ENABLE_DEFAULT_HOST(calculate_hybrid_coo_row_nnz, template void fill_in_hybrid(size_type num_rows, size_type stride, size_type ell_max_nnz_per_row, - const ValueType *__restrict__ source_values, - const IndexType *__restrict__ source_row_ptrs, - const IndexType *__restrict__ source_col_idxs, - const size_type *__restrict__ coo_offset, - ValueType *__restrict__ result_ell_val, - IndexType *__restrict__ result_ell_col, - ValueType *__restrict__ result_coo_val, - IndexType *__restrict__ result_coo_col, - IndexType *__restrict__ result_coo_row, + const ValueType* __restrict__ source_values, + const IndexType* __restrict__ source_row_ptrs, + const IndexType* __restrict__ source_col_idxs, + const size_type* __restrict__ coo_offset, + ValueType* __restrict__ result_ell_val, + IndexType* __restrict__ result_ell_col, + ValueType* __restrict__ result_coo_val, + IndexType* __restrict__ result_coo_col, + IndexType* __restrict__ result_coo_row, sycl::nd_item<3> item_ct1) { constexpr auto warp_size = config::warp_size; @@ -1070,9 +1070,9 @@ GKO_ENABLE_DEFAULT_HOST(fill_in_hybrid, fill_in_hybrid); template -void check_unsorted(const IndexType *__restrict__ row_ptrs, - const IndexType *__restrict__ col_idxs, IndexType num_rows, - bool *flag, sycl::nd_item<3> item_ct1, bool *sh_flag) +void check_unsorted(const IndexType* __restrict__ row_ptrs, + const IndexType* __restrict__ col_idxs, IndexType num_rows, + bool* flag, sycl::nd_item<3> item_ct1, bool* sh_flag) { auto block = group::this_thread_block(item_ct1); if (block.thread_rank() == 0) { @@ -1099,10 +1099,10 @@ void check_unsorted(const IndexType *__restrict__ row_ptrs, template void check_unsorted(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, const IndexType *row_ptrs, - const IndexType *col_idxs, IndexType num_rows, bool *flag) + sycl::queue* queue, const IndexType* row_ptrs, + const IndexType* col_idxs, IndexType num_rows, bool* flag) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor sh_flag_acc_ct1(cgh); @@ -1118,10 +1118,10 @@ void check_unsorted(dim3 grid, dim3 block, size_type dynamic_shared_memory, template void extract_diagonal(size_type diag_size, size_type nnz, - const ValueType *__restrict__ orig_values, - const IndexType *__restrict__ orig_row_ptrs, - const IndexType *__restrict__ orig_col_idxs, - ValueType *__restrict__ diag, sycl::nd_item<3> item_ct1) + const ValueType* __restrict__ orig_values, + const IndexType* __restrict__ orig_row_ptrs, + const IndexType* __restrict__ orig_col_idxs, + ValueType* __restrict__ diag, sycl::nd_item<3> item_ct1) { constexpr auto warp_size = config::warp_size; const auto row = thread::get_subwarp_id_flat(item_ct1); @@ -1149,9 +1149,9 @@ GKO_ENABLE_DEFAULT_HOST(extract_diagonal, extract_diagonal); template void row_ptr_permute_kernel(size_type num_rows, - const IndexType *__restrict__ permutation, - const IndexType *__restrict__ in_row_ptrs, - IndexType *__restrict__ out_nnz, + const IndexType* __restrict__ permutation, + const IndexType* __restrict__ in_row_ptrs, + IndexType* __restrict__ out_nnz, sycl::nd_item<3> item_ct1) { auto tid = thread::get_thread_id_flat(item_ct1); @@ -1168,9 +1168,9 @@ GKO_ENABLE_DEFAULT_HOST(row_ptr_permute_kernel, row_ptr_permute_kernel); template void inv_row_ptr_permute_kernel(size_type num_rows, - const IndexType *__restrict__ permutation, - const IndexType *__restrict__ in_row_ptrs, - IndexType *__restrict__ out_nnz, + const IndexType* __restrict__ permutation, + const IndexType* __restrict__ in_row_ptrs, + IndexType* __restrict__ out_nnz, sycl::nd_item<3> item_ct1) { auto tid = thread::get_thread_id_flat(item_ct1); @@ -1187,13 +1187,13 @@ GKO_ENABLE_DEFAULT_HOST(inv_row_ptr_permute_kernel, inv_row_ptr_permute_kernel); template void row_permute_kernel(size_type num_rows, - const IndexType *__restrict__ permutation, - const IndexType *__restrict__ in_row_ptrs, - const IndexType *__restrict__ in_cols, - const ValueType *__restrict__ in_vals, - const IndexType *__restrict__ out_row_ptrs, - IndexType *__restrict__ out_cols, - ValueType *__restrict__ out_vals, + const IndexType* __restrict__ permutation, + const IndexType* __restrict__ in_row_ptrs, + const IndexType* __restrict__ in_cols, + const ValueType* __restrict__ in_vals, + const IndexType* __restrict__ out_row_ptrs, + IndexType* __restrict__ out_cols, + ValueType* __restrict__ out_vals, sycl::nd_item<3> item_ct1) { auto tid = thread::get_subwarp_id_flat(item_ct1); @@ -1214,13 +1214,13 @@ void row_permute_kernel(size_type num_rows, template void row_permute_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type num_rows, - const IndexType *permutation, - const IndexType *in_row_ptrs, const IndexType *in_cols, - const ValueType *in_vals, const IndexType *out_row_ptrs, - IndexType *out_cols, ValueType *out_vals) + sycl::queue* queue, size_type num_rows, + const IndexType* permutation, + const IndexType* in_row_ptrs, const IndexType* in_cols, + const ValueType* in_vals, const IndexType* out_row_ptrs, + IndexType* out_cols, ValueType* out_vals) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { row_permute_kernel( @@ -1233,13 +1233,13 @@ void row_permute_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, template void inv_row_permute_kernel(size_type num_rows, - const IndexType *__restrict__ permutation, - const IndexType *__restrict__ in_row_ptrs, - const IndexType *__restrict__ in_cols, - const ValueType *__restrict__ in_vals, - const IndexType *__restrict__ out_row_ptrs, - IndexType *__restrict__ out_cols, - ValueType *__restrict__ out_vals, + const IndexType* __restrict__ permutation, + const IndexType* __restrict__ in_row_ptrs, + const IndexType* __restrict__ in_cols, + const ValueType* __restrict__ in_vals, + const IndexType* __restrict__ out_row_ptrs, + IndexType* __restrict__ out_cols, + ValueType* __restrict__ out_vals, sycl::nd_item<3> item_ct1) { auto tid = thread::get_subwarp_id_flat(item_ct1); @@ -1260,14 +1260,14 @@ void inv_row_permute_kernel(size_type num_rows, template void inv_row_permute_kernel(dim3 grid, dim3 block, - size_type dynamic_shared_memory, sycl::queue *queue, - size_type num_rows, const IndexType *permutation, - const IndexType *in_row_ptrs, - const IndexType *in_cols, const ValueType *in_vals, - const IndexType *out_row_ptrs, IndexType *out_cols, - ValueType *out_vals) -{ - queue->submit([&](sycl::handler &cgh) { + size_type dynamic_shared_memory, sycl::queue* queue, + size_type num_rows, const IndexType* permutation, + const IndexType* in_row_ptrs, + const IndexType* in_cols, const ValueType* in_vals, + const IndexType* out_row_ptrs, IndexType* out_cols, + ValueType* out_vals) +{ + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { inv_row_permute_kernel( @@ -1280,13 +1280,13 @@ void inv_row_permute_kernel(dim3 grid, dim3 block, template void inv_symm_permute_kernel(size_type num_rows, - const IndexType *__restrict__ permutation, - const IndexType *__restrict__ in_row_ptrs, - const IndexType *__restrict__ in_cols, - const ValueType *__restrict__ in_vals, - const IndexType *__restrict__ out_row_ptrs, - IndexType *__restrict__ out_cols, - ValueType *__restrict__ out_vals, + const IndexType* __restrict__ permutation, + const IndexType* __restrict__ in_row_ptrs, + const IndexType* __restrict__ in_cols, + const ValueType* __restrict__ in_vals, + const IndexType* __restrict__ out_row_ptrs, + IndexType* __restrict__ out_cols, + ValueType* __restrict__ out_vals, sycl::nd_item<3> item_ct1) { auto tid = thread::get_subwarp_id_flat(item_ct1); @@ -1308,14 +1308,14 @@ void inv_symm_permute_kernel(size_type num_rows, template void inv_symm_permute_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type num_rows, - const IndexType *permutation, - const IndexType *in_row_ptrs, - const IndexType *in_cols, const ValueType *in_vals, - const IndexType *out_row_ptrs, IndexType *out_cols, - ValueType *out_vals) -{ - queue->submit([&](sycl::handler &cgh) { + sycl::queue* queue, size_type num_rows, + const IndexType* permutation, + const IndexType* in_row_ptrs, + const IndexType* in_cols, const ValueType* in_vals, + const IndexType* out_row_ptrs, IndexType* out_cols, + ValueType* out_vals) +{ + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { inv_symm_permute_kernel( @@ -1331,11 +1331,11 @@ namespace host_kernel { template void merge_path_spmv(syn::value_list, std::shared_ptr exec, - const matrix::Csr *a, - const matrix::Dense *b, - matrix::Dense *c, - const matrix::Dense *alpha = nullptr, - const matrix::Dense *beta = nullptr) + const matrix::Csr* a, + const matrix::Dense* b, + matrix::Dense* c, + const matrix::Dense* alpha = nullptr, + const matrix::Dense* beta = nullptr) { const IndexType total = a->get_size()[0] + a->get_num_stored_elements(); const IndexType grid_num = @@ -1401,11 +1401,11 @@ int compute_items_per_thread(std::shared_ptr exec) template void classical_spmv(syn::value_list, std::shared_ptr exec, - const matrix::Csr *a, - const matrix::Dense *b, - matrix::Dense *c, - const matrix::Dense *alpha = nullptr, - const matrix::Dense *beta = nullptr) + const matrix::Csr* a, + const matrix::Dense* b, + matrix::Dense* c, + const matrix::Dense* alpha = nullptr, + const matrix::Dense* beta = nullptr) { constexpr int threads_per_cu = 7; const auto num_subgroup = @@ -1443,8 +1443,8 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_classical_spmv, classical_spmv); template void spmv(std::shared_ptr exec, - const matrix::Csr *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Csr* a, + const matrix::Dense* b, matrix::Dense* c) { if (a->get_strategy()->get_name() == "load_balance") { components::fill_array(exec, c->get_values(), @@ -1499,20 +1499,20 @@ void spmv(std::shared_ptr exec, oneapi::mkl::sparse::set_csr_data( mat_handle, IndexType(a->get_size()[0]), IndexType(a->get_size()[1]), oneapi::mkl::index_base::zero, - const_cast(a->get_const_row_ptrs()), - const_cast(a->get_const_col_idxs()), - const_cast(a->get_const_values())); + const_cast(a->get_const_row_ptrs()), + const_cast(a->get_const_col_idxs()), + const_cast(a->get_const_values())); if (b->get_size()[1] == 1 && b->get_stride() == 1) { oneapi::mkl::sparse::gemv( *exec->get_queue(), oneapi::mkl::transpose::nontrans, one(), mat_handle, - const_cast(b->get_const_values()), + const_cast(b->get_const_values()), zero(), c->get_values()); } else { oneapi::mkl::sparse::gemm( *exec->get_queue(), oneapi::mkl::transpose::nontrans, one(), mat_handle, - const_cast(b->get_const_values()), + const_cast(b->get_const_values()), b->get_size()[1], b->get_stride(), zero(), c->get_values(), c->get_stride()); } @@ -1530,11 +1530,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Csr *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Csr* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { if (a->get_strategy()->get_name() == "load_balance") { dense::scale(exec, beta, c); @@ -1563,21 +1563,21 @@ void advanced_spmv(std::shared_ptr exec, oneapi::mkl::sparse::set_csr_data( mat_handle, IndexType(a->get_size()[0]), IndexType(a->get_size()[1]), oneapi::mkl::index_base::zero, - const_cast(a->get_const_row_ptrs()), - const_cast(a->get_const_col_idxs()), - const_cast(a->get_const_values())); + const_cast(a->get_const_row_ptrs()), + const_cast(a->get_const_col_idxs()), + const_cast(a->get_const_values())); if (b->get_size()[1] == 1 && b->get_stride() == 1) { oneapi::mkl::sparse::gemv( *exec->get_queue(), oneapi::mkl::transpose::nontrans, exec->copy_val_to_host(alpha->get_const_values()), - mat_handle, const_cast(b->get_const_values()), + mat_handle, const_cast(b->get_const_values()), exec->copy_val_to_host(beta->get_const_values()), c->get_values()); } else { oneapi::mkl::sparse::gemm( *exec->get_queue(), oneapi::mkl::transpose::nontrans, exec->copy_val_to_host(alpha->get_const_values()), - mat_handle, const_cast(b->get_const_values()), + mat_handle, const_cast(b->get_const_values()), b->get_size()[1], b->get_stride(), exec->copy_val_to_host(beta->get_const_values()), c->get_values(), c->get_stride()); @@ -1696,7 +1696,7 @@ struct val_heap_element { * val_heap_element */ template -void sift_down(HeapElement *heap, typename HeapElement::index_type idx, +void sift_down(HeapElement* heap, typename HeapElement::index_type idx, typename HeapElement::index_type size) { auto curcol = heap[idx].col; @@ -1751,13 +1751,13 @@ void sift_down(HeapElement *heap, typename HeapElement::index_type idx, template auto spgemm_multiway_merge(size_type row, - const typename HeapElement::index_type *a_row_ptrs, - const typename HeapElement::index_type *a_cols, - const typename HeapElement::value_type *a_vals, - const typename HeapElement::index_type *b_row_ptrs, - const typename HeapElement::index_type *b_cols, - const typename HeapElement::value_type *b_vals, - HeapElement *heap, InitCallback init_cb, + const typename HeapElement::index_type* a_row_ptrs, + const typename HeapElement::index_type* a_cols, + const typename HeapElement::value_type* a_vals, + const typename HeapElement::index_type* b_row_ptrs, + const typename HeapElement::index_type* b_cols, + const typename HeapElement::value_type* b_vals, + HeapElement* heap, InitCallback init_cb, StepCallback step_cb, ColCallback col_cb) -> decltype(init_cb(0)) { @@ -1785,8 +1785,8 @@ auto spgemm_multiway_merge(size_type row, for (auto i = (a_size - 2) / 2; i >= 0; --i) { sift_down(heap + a_begin, i, a_size); } - auto &top = heap[a_begin]; - auto &bot = heap[a_end - 1]; + auto& top = heap[a_begin]; + auto& bot = heap[a_end - 1]; auto col = top.col; while (top.col != sentinel) { @@ -1814,9 +1814,9 @@ auto spgemm_multiway_merge(size_type row, template void spgemm(std::shared_ptr exec, - const matrix::Csr *a, - const matrix::Csr *b, - matrix::Csr *c) + const matrix::Csr* a, + const matrix::Csr* b, + matrix::Csr* c) { auto num_rows = a->get_size()[0]; const auto a_row_ptrs = a->get_const_row_ptrs(); @@ -1833,17 +1833,17 @@ void spgemm(std::shared_ptr exec, auto heap = heap_array.get_data(); auto col_heap = - reinterpret_cast *>(heap); + reinterpret_cast*>(heap); // first sweep: count nnz for each row - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { const auto a_row = static_cast(idx[0]); c_row_ptrs[a_row] = spgemm_multiway_merge( a_row, a_row_ptrs, a_cols, a_vals, b_row_ptrs, b_cols, b_vals, col_heap, [](size_type) { return IndexType{}; }, - [](ValueType, IndexType, IndexType &) {}, - [](IndexType, IndexType &nnz) { nnz++; }); + [](ValueType, IndexType, IndexType&) {}, + [](IndexType, IndexType& nnz) { nnz++; }); }); }); @@ -1853,14 +1853,14 @@ void spgemm(std::shared_ptr exec, // second sweep: accumulate non-zeros const auto new_nnz = exec->copy_val_to_host(c_row_ptrs + num_rows); matrix::CsrBuilder c_builder{c}; - auto &c_col_idxs_array = c_builder.get_col_idx_array(); - auto &c_vals_array = c_builder.get_value_array(); + auto& c_col_idxs_array = c_builder.get_col_idx_array(); + auto& c_vals_array = c_builder.get_value_array(); c_col_idxs_array.resize_and_reset(new_nnz); c_vals_array.resize_and_reset(new_nnz); auto c_col_idxs = c_col_idxs_array.get_data(); auto c_vals = c_vals_array.get_data(); - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { const auto a_row = static_cast(idx[0]); spgemm_multiway_merge( @@ -1870,10 +1870,10 @@ void spgemm(std::shared_ptr exec, return std::make_pair(zero(), c_row_ptrs[row]); }, [](ValueType val, IndexType, - std::pair &state) { + std::pair& state) { state.first += val; }, - [&](IndexType col, std::pair &state) { + [&](IndexType col, std::pair& state) { c_col_idxs[state.second] = col; c_vals[state.second] = state.first; state.first = zero(); @@ -1888,12 +1888,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL); template void advanced_spgemm(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Csr *a, - const matrix::Csr *b, - const matrix::Dense *beta, - const matrix::Csr *d, - matrix::Csr *c) + const matrix::Dense* alpha, + const matrix::Csr* a, + const matrix::Csr* b, + const matrix::Dense* beta, + const matrix::Csr* d, + matrix::Csr* c) { auto num_rows = a->get_size()[0]; const auto a_row_ptrs = a->get_const_row_ptrs(); @@ -1918,10 +1918,10 @@ void advanced_spgemm(std::shared_ptr exec, auto heap = heap_array.get_data(); auto col_heap = - reinterpret_cast *>(heap); + reinterpret_cast*>(heap); // first sweep: count nnz for each row - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { const auto a_row = static_cast(idx[0]); auto d_nz = d_row_ptrs[a_row]; @@ -1930,8 +1930,8 @@ void advanced_spgemm(std::shared_ptr exec, c_row_ptrs[a_row] = spgemm_multiway_merge( a_row, a_row_ptrs, a_cols, a_vals, b_row_ptrs, b_cols, b_vals, col_heap, [](size_type row) { return IndexType{}; }, - [](ValueType, IndexType, IndexType &) {}, - [&](IndexType col, IndexType &nnz) { + [](ValueType, IndexType, IndexType&) {}, + [&](IndexType col, IndexType& nnz) { // skip smaller elements from d while (d_col <= col) { d_nz++; @@ -1951,15 +1951,15 @@ void advanced_spgemm(std::shared_ptr exec, // second sweep: accumulate non-zeros const auto new_nnz = exec->copy_val_to_host(c_row_ptrs + num_rows); matrix::CsrBuilder c_builder{c}; - auto &c_col_idxs_array = c_builder.get_col_idx_array(); - auto &c_vals_array = c_builder.get_value_array(); + auto& c_col_idxs_array = c_builder.get_col_idx_array(); + auto& c_vals_array = c_builder.get_value_array(); c_col_idxs_array.resize_and_reset(new_nnz); c_vals_array.resize_and_reset(new_nnz); auto c_col_idxs = c_col_idxs_array.get_data(); auto c_vals = c_vals_array.get_data(); - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { const auto a_row = static_cast(idx[0]); auto d_nz = d_row_ptrs[a_row]; @@ -1977,10 +1977,10 @@ void advanced_spgemm(std::shared_ptr exec, c_row_ptrs[row]); }, [](ValueType val, IndexType, - std::pair &state) { + std::pair& state) { state.first += val; }, - [&](IndexType col, std::pair &state) { + [&](IndexType col, std::pair& state) { // handle smaller elements from d ValueType part_d_val{}; while (d_col <= col) { @@ -2022,11 +2022,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void spgeam(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Csr *a, - const matrix::Dense *beta, - const matrix::Csr *b, - matrix::Csr *c) + const matrix::Dense* alpha, + const matrix::Csr* a, + const matrix::Dense* beta, + const matrix::Csr* b, + matrix::Csr* c) { constexpr auto sentinel = std::numeric_limits::max(); const auto num_rows = a->get_size()[0]; @@ -2038,7 +2038,7 @@ void spgeam(std::shared_ptr exec, auto queue = exec->get_queue(); // count number of non-zeros per row - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { const auto row = static_cast(idx[0]); auto a_idx = a_row_ptrs[row]; @@ -2062,8 +2062,8 @@ void spgeam(std::shared_ptr exec, // second sweep: accumulate non-zeros const auto new_nnz = exec->copy_val_to_host(c_row_ptrs + num_rows); matrix::CsrBuilder c_builder{c}; - auto &c_col_idxs_array = c_builder.get_col_idx_array(); - auto &c_vals_array = c_builder.get_value_array(); + auto& c_col_idxs_array = c_builder.get_col_idx_array(); + auto& c_vals_array = c_builder.get_value_array(); c_col_idxs_array.resize_and_reset(new_nnz); c_vals_array.resize_and_reset(new_nnz); auto c_cols = c_col_idxs_array.get_data(); @@ -2075,7 +2075,7 @@ void spgeam(std::shared_ptr exec, const auto beta_vals = beta->get_const_values(); // count number of non-zeros per row - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { const auto row = static_cast(idx[0]); auto a_idx = a_row_ptrs[row]; @@ -2107,8 +2107,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL); template void convert_row_ptrs_to_idxs(std::shared_ptr exec, - const IndexType *ptrs, size_type num_rows, - IndexType *idxs) + const IndexType* ptrs, size_type num_rows, + IndexType* idxs) { const auto grid_dim = ceildiv(num_rows, default_block_size); @@ -2119,8 +2119,8 @@ void convert_row_ptrs_to_idxs(std::shared_ptr exec, template void convert_to_coo(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Coo *result) + const matrix::Csr* source, + matrix::Coo* result) { auto num_rows = result->get_size()[0]; @@ -2136,8 +2136,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Dense *result) + const matrix::Csr* source, + matrix::Dense* result) { const auto num_rows = result->get_size()[0]; const auto num_cols = result->get_size()[1]; @@ -2166,8 +2166,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sellp(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Sellp *result) + const matrix::Csr* source, + matrix::Sellp* result) { const auto num_rows = result->get_size()[0]; const auto num_cols = result->get_size()[1]; @@ -2224,8 +2224,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_ell(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Ell *result) + const matrix::Csr* source, + matrix::Ell* result) { const auto source_values = source->get_const_values(); const auto source_row_ptrs = source->get_const_row_ptrs(); @@ -2259,8 +2259,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_total_cols(std::shared_ptr exec, - const matrix::Csr *source, - size_type *result, size_type stride_factor, + const matrix::Csr* source, + size_type* result, size_type stride_factor, size_type slice_size) { const auto num_rows = source->get_size()[0]; @@ -2310,8 +2310,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void generic_transpose(std::shared_ptr exec, - const matrix::Csr *orig, - matrix::Csr *trans) + const matrix::Csr* orig, + matrix::Csr* trans) { const auto num_rows = orig->get_size()[0]; const auto num_cols = orig->get_size()[1]; @@ -2327,7 +2327,7 @@ void generic_transpose(std::shared_ptr exec, auto out_vals = trans->get_values(); components::fill_array(exec, tmp_counts, num_cols, IndexType{}); - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { const auto row = static_cast(idx[0]); const auto begin = row_ptrs[row]; @@ -2341,7 +2341,7 @@ void generic_transpose(std::shared_ptr exec, components::prefix_sum(exec, tmp_counts, num_cols + 1); exec->copy(num_cols + 1, tmp_counts, out_row_ptrs); - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { const auto row = static_cast(idx[0]); const auto begin = row_ptrs[row]; @@ -2361,8 +2361,8 @@ void generic_transpose(std::shared_ptr exec, template void transpose(std::shared_ptr exec, - const matrix::Csr *orig, - matrix::Csr *trans) + const matrix::Csr* orig, + matrix::Csr* trans) { generic_transpose(exec, orig, trans); } @@ -2372,8 +2372,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_TRANSPOSE_KERNEL); template void conj_transpose(std::shared_ptr exec, - const matrix::Csr *orig, - matrix::Csr *trans) + const matrix::Csr* orig, + matrix::Csr* trans) { generic_transpose(exec, orig, trans); } @@ -2384,9 +2384,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inv_symm_permute(std::shared_ptr exec, - const IndexType *perm, - const matrix::Csr *orig, - matrix::Csr *permuted) + const IndexType* perm, + const matrix::Csr* orig, + matrix::Csr* permuted) { auto num_rows = orig->get_size()[0]; auto count_num_blocks = ceildiv(num_rows, default_block_size); @@ -2409,9 +2409,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void row_permute(std::shared_ptr exec, - const IndexType *perm, - const matrix::Csr *orig, - matrix::Csr *row_permuted) + const IndexType* perm, + const matrix::Csr* orig, + matrix::Csr* row_permuted) { auto num_rows = orig->get_size()[0]; auto count_num_blocks = ceildiv(num_rows, default_block_size); @@ -2434,9 +2434,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inverse_row_permute(std::shared_ptr exec, - const IndexType *perm, - const matrix::Csr *orig, - matrix::Csr *row_permuted) + const IndexType* perm, + const matrix::Csr* orig, + matrix::Csr* row_permuted) { auto num_rows = orig->get_size()[0]; auto count_num_blocks = ceildiv(num_rows, default_block_size); @@ -2459,8 +2459,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_max_nnz_per_row(std::shared_ptr exec, - const matrix::Csr *source, - size_type *result) + const matrix::Csr* source, + size_type* result) { const auto num_rows = source->get_size()[0]; @@ -2492,8 +2492,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_hybrid(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Hybrid *result) + const matrix::Csr* source, + matrix::Hybrid* result) { auto ell_val = result->get_ell_values(); auto ell_col = result->get_ell_col_idxs(); @@ -2533,8 +2533,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_nonzeros_per_row(std::shared_ptr exec, - const matrix::Csr *source, - Array *result) + const matrix::Csr* source, + Array* result) { const auto num_rows = source->get_size()[0]; auto row_ptrs = source->get_const_row_ptrs(); @@ -2551,13 +2551,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void sort_by_column_index(std::shared_ptr exec, - matrix::Csr *to_sort) + matrix::Csr* to_sort) { const auto num_rows = to_sort->get_size()[0]; const auto row_ptrs = to_sort->get_const_row_ptrs(); auto cols = to_sort->get_col_idxs(); auto vals = to_sort->get_values(); - exec->get_queue()->submit([&](sycl::handler &cgh) { + exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { const auto row = static_cast(idx[0]); const auto begin = row_ptrs[row]; @@ -2610,14 +2610,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::Csr *to_check, bool *is_sorted) + const matrix::Csr* to_check, bool* is_sorted) { Array is_sorted_device_array{exec, {true}}; const auto num_rows = to_check->get_size()[0]; const auto row_ptrs = to_check->get_const_row_ptrs(); const auto cols = to_check->get_const_col_idxs(); auto is_sorted_device = is_sorted_device_array.get_data(); - exec->get_queue()->submit([&](sycl::handler &cgh) { + exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) { const auto row = static_cast(idx[0]); const auto begin = row_ptrs[row]; @@ -2641,8 +2641,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Csr *orig, - matrix::Diagonal *diag) + const matrix::Csr* orig, + matrix::Diagonal* diag) { const auto nnz = orig->get_num_stored_elements(); const auto diag_size = diag->get_size()[0]; diff --git a/dpcpp/matrix/dense_kernels.dp.cpp b/dpcpp/matrix/dense_kernels.dp.cpp index 5fefd83f4c7..0c89530d1d2 100644 --- a/dpcpp/matrix/dense_kernels.dp.cpp +++ b/dpcpp/matrix/dense_kernels.dp.cpp @@ -87,9 +87,9 @@ namespace kernel { template void compute_partial_reduce( - size_type num_rows, OutType *__restrict__ work, CallableGetValue get_value, + size_type num_rows, OutType* __restrict__ work, CallableGetValue get_value, CallableReduce reduce_op, sycl::nd_item<3> item_ct1, - UninitializedArray(cfg)> &tmp_work) + UninitializedArray(cfg)>& tmp_work) { constexpr auto wg_size = KCFG_1D::decode<0>(cfg); constexpr auto sg_size = KCFG_1D::decode<1>(cfg); @@ -101,7 +101,7 @@ void compute_partial_reduce( const auto global_id = thread::get_thread_id(item_ct1); - OutType *tmp_work_array = tmp_work; + OutType* tmp_work_array = tmp_work; auto tmp = zero(); for (auto i = global_id; i < num_rows; i += wg_size * num_blocks) { tmp = reduce_op(tmp, get_value(i)); @@ -121,10 +121,10 @@ void compute_partial_reduce( template void finalize_reduce_computation( - size_type size, const ValueType *work, ValueType *result, + size_type size, const ValueType* work, ValueType* result, CallableReduce reduce_op, CallableFinalize finalize_op, sycl::nd_item<3> item_ct1, - UninitializedArray(cfg)> &tmp_work) + UninitializedArray(cfg)>& tmp_work) { constexpr auto wg_size = KCFG_1D::decode<0>(cfg); constexpr auto sg_size = KCFG_1D::decode<1>(cfg); @@ -135,7 +135,7 @@ void finalize_reduce_computation( for (auto i = local_id; i < size; i += wg_size) { tmp = reduce_op(tmp, work[i]); } - ValueType *tmp_work_array = tmp_work; + ValueType* tmp_work_array = tmp_work; tmp_work_array[local_id] = tmp; ::gko::kernels::dpcpp::reduce(group::this_thread_block(item_ct1), @@ -149,29 +149,29 @@ void finalize_reduce_computation( template void compute_partial_dot( - size_type num_rows, const ValueType *__restrict__ x, size_type stride_x, - const ValueType *__restrict__ y, size_type stride_y, - ValueType *__restrict__ work, sycl::nd_item<3> item_ct1, - UninitializedArray(cfg)> &tmp_work) + size_type num_rows, const ValueType* __restrict__ x, size_type stride_x, + const ValueType* __restrict__ y, size_type stride_y, + ValueType* __restrict__ work, sycl::nd_item<3> item_ct1, + UninitializedArray(cfg)>& tmp_work) { compute_partial_reduce( num_rows, work, [x, stride_x, y, stride_y](size_type i) { return x[i * stride_x] * y[i * stride_y]; }, - [](const ValueType &x, const ValueType &y) { return x + y; }, item_ct1, + [](const ValueType& x, const ValueType& y) { return x + y; }, item_ct1, tmp_work); } template void compute_partial_dot(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type num_rows, - const ValueType *x, size_type stride_x, - const ValueType *y, size_type stride_y, - ValueType *work) + sycl::queue* queue, size_type num_rows, + const ValueType* x, size_type stride_x, + const ValueType* y, size_type stride_y, + ValueType* work) { constexpr auto wg_size = KCFG_1D::decode<0>(cfg); - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor, 0, sycl::access::mode::read_write, sycl::access::target::local> @@ -194,30 +194,30 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(compute_partial_dot_call, compute_partial_dot, template void compute_partial_conj_dot( - size_type num_rows, const ValueType *__restrict__ x, size_type stride_x, - const ValueType *__restrict__ y, size_type stride_y, - ValueType *__restrict__ work, sycl::nd_item<3> item_ct1, - UninitializedArray(cfg)> &tmp_work) + size_type num_rows, const ValueType* __restrict__ x, size_type stride_x, + const ValueType* __restrict__ y, size_type stride_y, + ValueType* __restrict__ work, sycl::nd_item<3> item_ct1, + UninitializedArray(cfg)>& tmp_work) { compute_partial_reduce( num_rows, work, [x, stride_x, y, stride_y](size_type i) { return conj(x[i * stride_x]) * y[i * stride_y]; }, - [](const ValueType &x, const ValueType &y) { return x + y; }, item_ct1, + [](const ValueType& x, const ValueType& y) { return x + y; }, item_ct1, tmp_work); } template void compute_partial_conj_dot(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type num_rows, - const ValueType *x, size_type stride_x, - const ValueType *y, size_type stride_y, - ValueType *work) + sycl::queue* queue, size_type num_rows, + const ValueType* x, size_type stride_x, + const ValueType* y, size_type stride_y, + ValueType* work) { constexpr auto wg_size = KCFG_1D::decode<0>(cfg); - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor, 0, sycl::access::mode::read_write, sycl::access::target::local> @@ -240,24 +240,24 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(compute_partial_conj_dot_call, template void finalize_sum_reduce_computation( - size_type size, const ValueType *work, ValueType *result, + size_type size, const ValueType* work, ValueType* result, sycl::nd_item<3> item_ct1, - UninitializedArray(cfg)> &tmp_work) + UninitializedArray(cfg)>& tmp_work) { finalize_reduce_computation( size, work, result, - [](const ValueType &x, const ValueType &y) { return x + y; }, - [](const ValueType &x) { return x; }, item_ct1, tmp_work); + [](const ValueType& x, const ValueType& y) { return x + y; }, + [](const ValueType& x) { return x; }, item_ct1, tmp_work); } template void finalize_sum_reduce_computation(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type size, - const ValueType *work, ValueType *result) + sycl::queue* queue, size_type size, + const ValueType* work, ValueType* result) { constexpr auto wg_size = KCFG_1D::decode<0>(cfg); - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor, 0, sycl::access::mode::read_write, sycl::access::target::local> @@ -280,27 +280,27 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(finalize_sum_reduce_computation_call, template void compute_partial_norm2( - size_type num_rows, const ValueType *__restrict__ x, size_type stride_x, - remove_complex *__restrict__ work, sycl::nd_item<3> item_ct1, - UninitializedArray, KCFG_1D::decode<0>(cfg)> - &tmp_work) + size_type num_rows, const ValueType* __restrict__ x, size_type stride_x, + remove_complex* __restrict__ work, sycl::nd_item<3> item_ct1, + UninitializedArray, KCFG_1D::decode<0>(cfg)>& + tmp_work) { using norm_type = remove_complex; compute_partial_reduce( num_rows, work, [x, stride_x](size_type i) { return squared_norm(x[i * stride_x]); }, - [](const norm_type &x, const norm_type &y) { return x + y; }, item_ct1, + [](const norm_type& x, const norm_type& y) { return x + y; }, item_ct1, tmp_work); } template void compute_partial_norm2(dim3 grid, dim3 block, - size_type dynamic_shared_memory, sycl::queue *queue, - size_type num_rows, const ValueType *x, - size_type stride_x, remove_complex *work) + size_type dynamic_shared_memory, sycl::queue* queue, + size_type num_rows, const ValueType* x, + size_type stride_x, remove_complex* work) { constexpr auto wg_size = KCFG_1D::decode<0>(cfg); - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor, wg_size>, 0, sycl::access::mode::read_write, sycl::access::target::local> @@ -323,24 +323,24 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(compute_partial_norm2_call, template void finalize_sqrt_reduce_computation( - size_type size, const ValueType *work, ValueType *result, + size_type size, const ValueType* work, ValueType* result, sycl::nd_item<3> item_ct1, - UninitializedArray(cfg)> &tmp_work) + UninitializedArray(cfg)>& tmp_work) { finalize_reduce_computation( size, work, result, - [](const ValueType &x, const ValueType &y) { return x + y; }, - [](const ValueType &x) { return std::sqrt(x); }, item_ct1, tmp_work); + [](const ValueType& x, const ValueType& y) { return x + y; }, + [](const ValueType& x) { return std::sqrt(x); }, item_ct1, tmp_work); } template void finalize_sqrt_reduce_computation(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type size, - const ValueType *work, ValueType *result) + sycl::queue* queue, size_type size, + const ValueType* work, ValueType* result) { constexpr auto wg_size = KCFG_1D::decode<0>(cfg); - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor, 0, sycl::access::mode::read_write, sycl::access::target::local> @@ -364,11 +364,11 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(finalize_sqrt_reduce_computation_call, template void fill_in_coo(size_type num_rows, size_type num_cols, size_type stride, - const size_type *__restrict__ row_ptrs, - const ValueType *__restrict__ source, - IndexType *__restrict__ row_idxs, - IndexType *__restrict__ col_idxs, - ValueType *__restrict__ values, sycl::nd_item<3> item_ct1) + const size_type* __restrict__ row_ptrs, + const ValueType* __restrict__ source, + IndexType* __restrict__ row_idxs, + IndexType* __restrict__ col_idxs, + ValueType* __restrict__ values, sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); if (tidx < num_rows) { @@ -390,8 +390,8 @@ GKO_ENABLE_DEFAULT_HOST(fill_in_coo, fill_in_coo) template void count_nnz_per_row(size_type num_rows, size_type num_cols, size_type stride, - const ValueType *__restrict__ work, - IndexType *__restrict__ result, + const ValueType* __restrict__ work, + IndexType* __restrict__ result, sycl::nd_item<3> item_ct1) { constexpr auto sg_size = KCFG_1D::decode<1>(cfg); @@ -408,7 +408,7 @@ void count_nnz_per_row(size_type num_rows, size_type num_cols, size_type stride, } result[row_idx] = ::gko::kernels::dpcpp::reduce( warp_tile, part_result, - [](const size_type &a, const size_type &b) { return a + b; }); + [](const size_type& a, const size_type& b) { return a + b; }); } } @@ -420,10 +420,10 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(count_nnz_per_row_call, count_nnz_per_row, template void fill_in_csr(size_type num_rows, size_type num_cols, size_type stride, - const ValueType *__restrict__ source, - IndexType *__restrict__ row_ptrs, - IndexType *__restrict__ col_idxs, - ValueType *__restrict__ values, sycl::nd_item<3> item_ct1) + const ValueType* __restrict__ source, + IndexType* __restrict__ row_ptrs, + IndexType* __restrict__ col_idxs, + ValueType* __restrict__ values, sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); @@ -444,10 +444,10 @@ GKO_ENABLE_DEFAULT_HOST(fill_in_csr, fill_in_csr) template void fill_in_ell(size_type num_rows, size_type num_cols, - size_type source_stride, const ValueType *__restrict__ source, + size_type source_stride, const ValueType* __restrict__ source, size_type max_nnz_per_row, size_type result_stride, - IndexType *__restrict__ col_ptrs, - ValueType *__restrict__ values, sycl::nd_item<3> item_ct1) + IndexType* __restrict__ col_ptrs, + ValueType* __restrict__ values, sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); if (tidx < num_rows) { @@ -478,9 +478,9 @@ GKO_ENABLE_DEFAULT_HOST(fill_in_ell, fill_in_ell) template void calculate_slice_lengths(size_type num_rows, size_type slice_size, int slice_num, size_type stride_factor, - const size_type *__restrict__ nnz_per_row, - size_type *__restrict__ slice_lengths, - size_type *__restrict__ slice_sets, + const size_type* __restrict__ nnz_per_row, + size_type* __restrict__ slice_lengths, + size_type* __restrict__ slice_sets, sycl::nd_item<3> item_ct1) { constexpr auto sg_size = cfg; @@ -499,7 +499,7 @@ void calculate_slice_lengths(size_type num_rows, size_type slice_size, group::tiled_partition(group::this_thread_block(item_ct1)); auto warp_result = ::gko::kernels::dpcpp::reduce( warp_tile, thread_result, - [](const size_type &a, const size_type &b) { return max(a, b); }); + [](const size_type& a, const size_type& b) { return max(a, b); }); if (tid_in_warp == 0 && runable) { auto slice_length = ceildiv(warp_result, stride_factor) * stride_factor; @@ -517,11 +517,11 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(calculate_slice_lengths_call, template void fill_in_sellp(size_type num_rows, size_type num_cols, size_type slice_size, - size_type stride, const ValueType *__restrict__ source, - size_type *__restrict__ slice_lengths, - size_type *__restrict__ slice_sets, - IndexType *__restrict__ col_idxs, - ValueType *__restrict__ vals, sycl::nd_item<3> item_ct1) + size_type stride, const ValueType* __restrict__ source, + size_type* __restrict__ slice_lengths, + size_type* __restrict__ slice_sets, + IndexType* __restrict__ col_idxs, + ValueType* __restrict__ vals, sycl::nd_item<3> item_ct1) { const auto global_row = thread::get_thread_id_flat(item_ct1); const auto row = global_row % slice_size; @@ -552,16 +552,16 @@ GKO_ENABLE_DEFAULT_HOST(fill_in_sellp, fill_in_sellp) template -void reduce_max_nnz(size_type size, const size_type *__restrict__ nnz_per_row, - size_type *__restrict__ result, sycl::nd_item<3> item_ct1, - uint8_t *dpct_local) +void reduce_max_nnz(size_type size, const size_type* __restrict__ nnz_per_row, + size_type* __restrict__ result, sycl::nd_item<3> item_ct1, + uint8_t* dpct_local) { constexpr auto sg_size = KCFG_1D::decode<1>(cfg); - auto block_max = (size_type *)dpct_local; + auto block_max = (size_type*)dpct_local; reduce_array( size, nnz_per_row, block_max, item_ct1, - [](const size_type &x, const size_type &y) { return max(x, y); }); + [](const size_type& x, const size_type& y) { return max(x, y); }); if (item_ct1.get_local_id(2) == 0) { result[item_ct1.get_group(2)] = block_max[0]; @@ -570,10 +570,10 @@ void reduce_max_nnz(size_type size, const size_type *__restrict__ nnz_per_row, template void reduce_max_nnz(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type size, - const size_type *nnz_per_row, size_type *result) + sycl::queue* queue, size_type size, + const size_type* nnz_per_row, size_type* result) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor dpct_local_acc_ct1(sycl::range<1>(dynamic_shared_memory), cgh); @@ -595,8 +595,8 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(reduce_max_nnz_call, reduce_max_nnz, template void reduce_max_nnz_per_slice(size_type num_rows, size_type slice_size, size_type stride_factor, - const size_type *__restrict__ nnz_per_row, - size_type *__restrict__ result, + const size_type* __restrict__ nnz_per_row, + size_type* __restrict__ result, sycl::nd_item<3> item_ct1) { constexpr auto sg_size = KCFG_1D::decode<1>(cfg); @@ -616,7 +616,7 @@ void reduce_max_nnz_per_slice(size_type num_rows, size_type slice_size, auto warp_result = ::gko::kernels::dpcpp::reduce( warp_tile, thread_result, - [](const size_type &a, const size_type &b) { return max(a, b); }); + [](const size_type& a, const size_type& b) { return max(a, b); }); if (tid_in_warp == 0 && warpid < slice_num) { result[warpid] = ceildiv(warp_result, stride_factor) * stride_factor; @@ -633,15 +633,15 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(reduce_max_nnz_per_slice_call, template void reduce_total_cols(size_type num_slices, - const size_type *__restrict__ max_nnz_per_slice, - size_type *__restrict__ result, - sycl::nd_item<3> item_ct1, uint8_t *dpct_local) + const size_type* __restrict__ max_nnz_per_slice, + size_type* __restrict__ result, + sycl::nd_item<3> item_ct1, uint8_t* dpct_local) { - auto block_result = (size_type *)dpct_local; + auto block_result = (size_type*)dpct_local; constexpr auto sg_size = KCFG_1D::decode<1>(cfg); reduce_array( num_slices, max_nnz_per_slice, block_result, item_ct1, - [](const size_type &x, const size_type &y) { return x + y; }); + [](const size_type& x, const size_type& y) { return x + y; }); if (item_ct1.get_local_id(2) == 0) { result[item_ct1.get_group(2)] = block_result[0]; @@ -650,10 +650,10 @@ void reduce_total_cols(size_type num_slices, template void reduce_total_cols(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type num_slices, - const size_type *max_nnz_per_slice, size_type *result) + sycl::queue* queue, size_type num_slices, + const size_type* max_nnz_per_slice, size_type* result) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor dpct_local_acc_ct1(sycl::range<1>(dynamic_shared_memory), cgh); @@ -674,10 +674,10 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(reduce_total_cols_call, reduce_total_cols, template void transpose(const size_type nrows, const size_type ncols, - const ValueType *__restrict__ in, const size_type in_stride, - ValueType *__restrict__ out, const size_type out_stride, + const ValueType* __restrict__ in, const size_type in_stride, + ValueType* __restrict__ out, const size_type out_stride, Closure op, sycl::nd_item<3> item_ct1, - UninitializedArray &space) + UninitializedArray& space) { auto local_x = item_ct1.get_local_id(2); auto local_y = item_ct1.get_local_id(1); @@ -698,10 +698,10 @@ void transpose(const size_type nrows, const size_type ncols, template __WG_BOUND__(sg_size, sg_size) void transpose(const size_type nrows, const size_type ncols, - const ValueType *__restrict__ in, const size_type in_stride, - ValueType *__restrict__ out, const size_type out_stride, + const ValueType* __restrict__ in, const size_type in_stride, + ValueType* __restrict__ out, const size_type out_stride, sycl::nd_item<3> item_ct1, - UninitializedArray &space) + UninitializedArray& space) { transpose( nrows, ncols, in, in_stride, out, out_stride, @@ -710,12 +710,12 @@ void transpose(const size_type nrows, const size_type ncols, template void transpose(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, const size_type nrows, const size_type ncols, - const ValueType *in, const size_type in_stride, ValueType *out, + sycl::queue* queue, const size_type nrows, const size_type ncols, + const ValueType* in, const size_type in_stride, ValueType* out, const size_type out_stride) { - queue->submit([&](sycl::handler &cgh) { - sycl::accessor, 0, + queue->submit([&](sycl::handler& cgh) { + sycl::accessor, 0, sycl::access_mode::read_write, sycl::access::target::local> space_acc_ct1(cgh); @@ -734,12 +734,11 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(transpose_call, transpose, subgroup_list); template __WG_BOUND__(sg_size, sg_size) -void conj_transpose( - const size_type nrows, const size_type ncols, - const ValueType *__restrict__ in, const size_type in_stride, - ValueType *__restrict__ out, const size_type out_stride, - sycl::nd_item<3> item_ct1, - UninitializedArray &space) +void conj_transpose(const size_type nrows, const size_type ncols, + const ValueType* __restrict__ in, const size_type in_stride, + ValueType* __restrict__ out, const size_type out_stride, + sycl::nd_item<3> item_ct1, + UninitializedArray& space) { transpose( nrows, ncols, in, in_stride, out, out_stride, @@ -748,13 +747,13 @@ void conj_transpose( template void conj_transpose(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, const size_type nrows, - const size_type ncols, const ValueType *in, - const size_type in_stride, ValueType *out, + sycl::queue* queue, const size_type nrows, + const size_type ncols, const ValueType* in, + const size_type in_stride, ValueType* out, const size_type out_stride) { - queue->submit([&](sycl::handler &cgh) { - sycl::accessor, 0, + queue->submit([&](sycl::handler& cgh) { + sycl::accessor, 0, sycl::access_mode::read_write, sycl::access::target::local> space_acc_ct1(cgh); @@ -779,9 +778,9 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(conj_transpose_call, conj_transpose, template void simple_apply(std::shared_ptr exec, - const matrix::Dense *a, - const matrix::Dense *b, - matrix::Dense *c) + const matrix::Dense* a, + const matrix::Dense* b, + matrix::Dense* c) { using namespace oneapi::mkl; oneapi::mkl::blas::row_major::gemm( @@ -796,9 +795,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL); template void apply(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Dense *a, const matrix::Dense *b, - const matrix::Dense *beta, matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Dense* a, const matrix::Dense* b, + const matrix::Dense* beta, matrix::Dense* c) { using namespace oneapi::mkl; oneapi::mkl::blas::row_major::gemm( @@ -815,9 +814,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); template void compute_dot(std::shared_ptr exec, - const matrix::Dense *x, - const matrix::Dense *y, - matrix::Dense *result) + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) { if (x->get_size()[1] == 1) { // TODO: write a custom kernel which does this more efficiently @@ -860,9 +859,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL); template void compute_conj_dot(std::shared_ptr exec, - const matrix::Dense *x, - const matrix::Dense *y, - matrix::Dense *result) + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) { if (x->get_size()[1] == 1) { // TODO: write a custom kernel which does this more efficiently @@ -908,8 +907,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL); template void compute_norm2(std::shared_ptr exec, - const matrix::Dense *x, - matrix::Dense> *result) + const matrix::Dense* x, + matrix::Dense>* result) { if (x->get_size()[1] == 1) { oneapi::mkl::blas::row_major::nrm2( @@ -953,8 +952,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL); template void convert_to_coo(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Coo *result) + const matrix::Dense* source, + matrix::Coo* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -992,8 +991,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Csr *result) + const matrix::Dense* source, + matrix::Csr* result) { auto queue = exec->get_queue(); constexpr auto kcfg_1d_array = as_array(kcfg_1d_list); @@ -1036,8 +1035,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_ell(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Ell *result) + const matrix::Dense* source, + matrix::Ell* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -1070,8 +1069,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_hybrid(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Hybrid *result) + const matrix::Dense* source, + matrix::Hybrid* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -1080,8 +1079,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sellp(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Sellp *result) + const matrix::Dense* source, + matrix::Sellp* result) { auto queue = exec->get_queue(); constexpr auto kcfg_1d_array = as_array(kcfg_1d_list); @@ -1139,8 +1138,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sparsity_csr(std::shared_ptr exec, - const matrix::Dense *source, - matrix::SparsityCsr *result) + const matrix::Dense* source, + matrix::SparsityCsr* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -1149,7 +1148,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Dense *source, size_type *result) + const matrix::Dense* source, size_type* result) { const auto num_rows = source->get_size()[0]; auto nnz_per_row = Array(exec, num_rows); @@ -1164,8 +1163,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COUNT_NONZEROS_KERNEL); template void calculate_max_nnz_per_row(std::shared_ptr exec, - const matrix::Dense *source, - size_type *result) + const matrix::Dense* source, + size_type* result) { const auto num_rows = source->get_size()[0]; auto nnz_per_row = Array(exec, num_rows); @@ -1203,8 +1202,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void calculate_nonzeros_per_row(std::shared_ptr exec, - const matrix::Dense *source, - Array *result) + const matrix::Dense* source, + Array* result) { auto queue = exec->get_queue(); constexpr auto kcfg_1d_array = as_array(kcfg_1d_list); @@ -1233,8 +1232,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void calculate_total_cols(std::shared_ptr exec, - const matrix::Dense *source, - size_type *result, size_type stride_factor, + const matrix::Dense* source, + size_type* result, size_type stride_factor, size_type slice_size) { const auto num_rows = source->get_size()[0]; @@ -1292,8 +1291,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void transpose(std::shared_ptr exec, - const matrix::Dense *orig, - matrix::Dense *trans) + const matrix::Dense* orig, + matrix::Dense* trans) { auto size = orig->get_size(); auto sg_array = syn::as_array(subgroup_list); @@ -1314,8 +1313,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_TRANSPOSE_KERNEL); template void conj_transpose(std::shared_ptr exec, - const matrix::Dense *orig, - matrix::Dense *trans) + const matrix::Dense* orig, + matrix::Dense* trans) { auto size = orig->get_size(); auto sg_array = syn::as_array(subgroup_list); diff --git a/dpcpp/matrix/diagonal_kernels.dp.cpp b/dpcpp/matrix/diagonal_kernels.dp.cpp index 1aae9a393a7..98566a50997 100644 --- a/dpcpp/matrix/diagonal_kernels.dp.cpp +++ b/dpcpp/matrix/diagonal_kernels.dp.cpp @@ -65,9 +65,9 @@ namespace kernel { template -void apply_to_csr(size_type num_rows, const ValueType *__restrict__ diag, - const IndexType *__restrict__ row_ptrs, - ValueType *__restrict__ result_values, +void apply_to_csr(size_type num_rows, const ValueType* __restrict__ diag, + const IndexType* __restrict__ row_ptrs, + ValueType* __restrict__ result_values, sycl::nd_item<3> item_ct1) { constexpr auto warp_size = config::warp_size; @@ -96,9 +96,9 @@ GKO_ENABLE_DEFAULT_HOST(apply_to_csr, apply_to_csr); template void apply_to_csr(std::shared_ptr exec, - const matrix::Diagonal *a, - const matrix::Csr *b, - matrix::Csr *c) + const matrix::Diagonal* a, + const matrix::Csr* b, + matrix::Csr* c) { const auto num_rows = b->get_size()[0]; const auto diag_values = a->get_const_values(); diff --git a/dpcpp/matrix/ell_kernels.dp.cpp b/dpcpp/matrix/ell_kernels.dp.cpp index 7525c85eae9..4bf8c7a688b 100644 --- a/dpcpp/matrix/ell_kernels.dp.cpp +++ b/dpcpp/matrix/ell_kernels.dp.cpp @@ -116,12 +116,12 @@ template void spmv_kernel( const size_type num_rows, const int num_worker_per_row, - acc::range val, const IndexType *__restrict__ col, + acc::range val, const IndexType* __restrict__ col, const size_type stride, const size_type num_stored_elements_per_row, - acc::range b, OutputValueType *__restrict__ c, + acc::range b, OutputValueType* __restrict__ c, const size_type c_stride, Closure op, sycl::nd_item<3> item_ct1, UninitializedArray &storage) + default_block_size / num_thread_per_worker>& storage) { const auto tidx = thread::get_thread_id_flat(item_ct1); const decltype(tidx) column_id = item_ct1.get_group(1); @@ -189,30 +189,30 @@ template void spmv( const size_type num_rows, const int num_worker_per_row, - acc::range val, const IndexType *__restrict__ col, + acc::range val, const IndexType* __restrict__ col, const size_type stride, const size_type num_stored_elements_per_row, - acc::range b, OutputValueType *__restrict__ c, + acc::range b, OutputValueType* __restrict__ c, const size_type c_stride, sycl::nd_item<3> item_ct1, UninitializedArray &storage) + default_block_size / num_thread_per_worker>& storage) { spmv_kernel( num_rows, num_worker_per_row, val, col, stride, num_stored_elements_per_row, b, c, c_stride, - [](const OutputValueType &x, const OutputValueType &y) { return x; }, + [](const OutputValueType& x, const OutputValueType& y) { return x; }, item_ct1, storage); } template void spmv(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, const size_type num_rows, + sycl::queue* queue, const size_type num_rows, const int num_worker_per_row, acc::range val, - const IndexType *col, const size_type stride, + const IndexType* col, const size_type stride, const size_type num_stored_elements_per_row, acc::range b, - OutputValueType *c, const size_type c_stride) + OutputValueType* c, const size_type c_stride) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor< UninitializedArray, @@ -235,12 +235,12 @@ template alpha, acc::range val, - const IndexType *__restrict__ col, const size_type stride, + const IndexType* __restrict__ col, const size_type stride, const size_type num_stored_elements_per_row, acc::range b, - const OutputValueType *__restrict__ beta, OutputValueType *__restrict__ c, + const OutputValueType* __restrict__ beta, OutputValueType* __restrict__ c, const size_type c_stride, sycl::nd_item<3> item_ct1, UninitializedArray &storage) + default_block_size / num_thread_per_worker>& storage) { const OutputValueType alpha_val = alpha(0); const OutputValueType beta_val = beta[0]; @@ -253,7 +253,7 @@ void spmv( spmv_kernel( num_rows, num_worker_per_row, val, col, stride, num_stored_elements_per_row, b, c, c_stride, - [&alpha_val](const OutputValueType &x, const OutputValueType &y) { + [&alpha_val](const OutputValueType& x, const OutputValueType& y) { return alpha_val * x; }, item_ct1, storage); @@ -261,8 +261,8 @@ void spmv( spmv_kernel( num_rows, num_worker_per_row, val, col, stride, num_stored_elements_per_row, b, c, c_stride, - [&alpha_val, &beta_val](const OutputValueType &x, - const OutputValueType &y) { + [&alpha_val, &beta_val](const OutputValueType& x, + const OutputValueType& y) { return alpha_val * x + beta_val * y; }, item_ct1, storage); @@ -272,14 +272,14 @@ void spmv( template void spmv(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, const size_type num_rows, + sycl::queue* queue, const size_type num_rows, const int num_worker_per_row, acc::range alpha, - acc::range val, const IndexType *col, + acc::range val, const IndexType* col, const size_type stride, const size_type num_stored_elements_per_row, - acc::range b, const OutputValueType *beta, - OutputValueType *c, const size_type c_stride) + acc::range b, const OutputValueType* beta, + OutputValueType* c, const size_type c_stride) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor< UninitializedArray, @@ -302,7 +302,7 @@ void spmv(dim3 grid, dim3 block, size_type dynamic_shared_memory, template void initialize_zero_dense(size_type num_rows, size_type num_cols, - size_type stride, ValueType *__restrict__ result, + size_type stride, ValueType* __restrict__ result, sycl::nd_item<3> item_ct1) { const auto tidx_x = @@ -321,9 +321,9 @@ GKO_ENABLE_DEFAULT_HOST(initialize_zero_dense, initialize_zero_dense); template void fill_in_dense(size_type num_rows, size_type nnz, size_type source_stride, - const IndexType *__restrict__ col_idxs, - const ValueType *__restrict__ values, - size_type result_stride, ValueType *__restrict__ result, + const IndexType* __restrict__ col_idxs, + const ValueType* __restrict__ values, + size_type result_stride, ValueType* __restrict__ result, sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); @@ -341,8 +341,8 @@ GKO_ENABLE_DEFAULT_HOST(fill_in_dense, fill_in_dense); template void count_nnz_per_row(size_type num_rows, size_type max_nnz_per_row, - size_type stride, const ValueType *__restrict__ values, - IndexType *__restrict__ result, + size_type stride, const ValueType* __restrict__ values, + IndexType* __restrict__ result, sycl::nd_item<3> item_ct1) { constexpr auto warp_size = config::warp_size; @@ -360,17 +360,17 @@ void count_nnz_per_row(size_type num_rows, size_type max_nnz_per_row, } result[row_idx] = ::gko::kernels::dpcpp::reduce( warp_tile, part_result, - [](const size_type &a, const size_type &b) { return a + b; }); + [](const size_type& a, const size_type& b) { return a + b; }); } } template void count_nnz_per_row(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type num_rows, + sycl::queue* queue, size_type num_rows, size_type max_nnz_per_row, size_type stride, - const ValueType *values, IndexType *result) + const ValueType* values, IndexType* result) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { count_nnz_per_row(num_rows, max_nnz_per_row, stride, values, @@ -379,10 +379,9 @@ void count_nnz_per_row(dim3 grid, dim3 block, size_type dynamic_shared_memory, }); } -#define GKO_ELL_COUNT_NNZ_PER_ROW(ValueType, IndexType) \ - void count_nnz_per_row(dim3, dim3, size_type, sycl::queue *, size_type, \ - size_type, size_type, const ValueType *, \ - IndexType *) +#define GKO_ELL_COUNT_NNZ_PER_ROW(ValueType, IndexType) \ + void count_nnz_per_row(dim3, dim3, size_type, sycl::queue*, size_type, \ + size_type, size_type, const ValueType*, IndexType*) GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_ELL_COUNT_NNZ_PER_ROW); @@ -391,11 +390,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_ELL_COUNT_NNZ_PER_ROW); template void fill_in_csr(size_type num_rows, size_type max_nnz_per_row, - size_type stride, const ValueType *__restrict__ source_values, - const IndexType *__restrict__ source_col_idxs, - IndexType *__restrict__ result_row_ptrs, - IndexType *__restrict__ result_col_idxs, - ValueType *__restrict__ result_values, + size_type stride, const ValueType* __restrict__ source_values, + const IndexType* __restrict__ source_col_idxs, + IndexType* __restrict__ result_row_ptrs, + IndexType* __restrict__ result_col_idxs, + ValueType* __restrict__ result_values, sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); @@ -419,9 +418,9 @@ GKO_ENABLE_DEFAULT_HOST(fill_in_csr, fill_in_csr); template void extract_diagonal(size_type diag_size, size_type max_nnz_per_row, size_type orig_stride, - const ValueType *__restrict__ orig_values, - const IndexType *__restrict__ orig_col_idxs, - ValueType *__restrict__ diag, sycl::nd_item<3> item_ct1) + const ValueType* __restrict__ orig_values, + const IndexType* __restrict__ orig_col_idxs, + ValueType* __restrict__ diag, sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); const auto row = tidx % diag_size; @@ -446,7 +445,7 @@ namespace { template GKO_INLINE auto as_dpcpp_accessor( - const acc::range> &acc) + const acc::range>& acc) { return acc::range>( acc.get_accessor().get_size(), acc.get_accessor().get_stored_data(), @@ -459,11 +458,11 @@ template , std::shared_ptr exec, int num_worker_per_row, - const matrix::Ell *a, - const matrix::Dense *b, - matrix::Dense *c, - const matrix::Dense *alpha = nullptr, - const matrix::Dense *beta = nullptr) + const matrix::Ell* a, + const matrix::Dense* b, + matrix::Dense* c, + const matrix::Dense* alpha = nullptr, + const matrix::Dense* beta = nullptr) { using a_accessor = gko::acc::reduced_row_major<1, OutputValueType, const MatrixValueType>; @@ -516,7 +515,7 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_abstract_spmv, abstract_spmv); template std::array compute_thread_worker_and_atomicity( std::shared_ptr exec, - const matrix::Ell *a) + const matrix::Ell* a) { int num_thread_per_worker = 8; int atomic = 0; @@ -559,9 +558,9 @@ std::array compute_thread_worker_and_atomicity( template void spmv(std::shared_ptr exec, - const matrix::Ell *a, - const matrix::Dense *b, - matrix::Dense *c) + const matrix::Ell* a, + const matrix::Dense* b, + matrix::Dense* c) { const auto data = compute_thread_worker_and_atomicity(exec, a); const int num_thread_per_worker = std::get<0>(data); @@ -593,11 +592,11 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Ell *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Ell* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { const auto data = compute_thread_worker_and_atomicity(exec, a); const int num_thread_per_worker = std::get<0>(data); @@ -626,8 +625,8 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Ell *source, - matrix::Dense *result) + const matrix::Ell* source, + matrix::Dense* result) { const auto num_rows = result->get_size()[0]; const auto num_cols = result->get_size()[1]; @@ -657,8 +656,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Ell *source, - matrix::Csr *result) + const matrix::Ell* source, + matrix::Csr* result) { auto num_rows = result->get_size()[0]; @@ -693,8 +692,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Ell *source, - size_type *result) + const matrix::Ell* source, + size_type* result) { const auto num_rows = source->get_size()[0]; auto nnz_per_row = Array(exec, num_rows); @@ -710,8 +709,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_nonzeros_per_row(std::shared_ptr exec, - const matrix::Ell *source, - Array *result) + const matrix::Ell* source, + Array* result) { const auto num_rows = source->get_size()[0]; const auto max_nnz_per_row = source->get_num_stored_elements_per_row(); @@ -732,8 +731,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Ell *orig, - matrix::Diagonal *diag) + const matrix::Ell* orig, + matrix::Diagonal* diag) { const auto max_nnz_per_row = orig->get_num_stored_elements_per_row(); const auto orig_stride = orig->get_stride(); diff --git a/dpcpp/matrix/fbcsr_kernels.dp.cpp b/dpcpp/matrix/fbcsr_kernels.dp.cpp index bc61e5c6985..4d9072eac5f 100644 --- a/dpcpp/matrix/fbcsr_kernels.dp.cpp +++ b/dpcpp/matrix/fbcsr_kernels.dp.cpp @@ -59,20 +59,20 @@ namespace fbcsr { template void spmv(std::shared_ptr exec, - const matrix::Fbcsr *a, - const matrix::Dense *b, - matrix::Dense *c) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* a, + const matrix::Dense* b, + matrix::Dense* c) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Fbcsr *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) GKO_NOT_IMPLEMENTED; + const matrix::Dense* alpha, + const matrix::Fbcsr* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); @@ -80,14 +80,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_row_ptrs_to_idxs(std::shared_ptr exec, - const IndexType *ptrs, size_type num_rows, - IndexType *idxs) GKO_NOT_IMPLEMENTED; + const IndexType* ptrs, size_type num_rows, + IndexType* idxs) GKO_NOT_IMPLEMENTED; template void convert_to_dense(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Dense *result) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* source, + matrix::Dense* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); @@ -95,8 +95,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(const std::shared_ptr exec, - const matrix::Fbcsr *const source, - matrix::Csr *const result) + const matrix::Fbcsr* const source, + matrix::Csr* const result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -105,8 +105,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void transpose(std::shared_ptr exec, - const matrix::Fbcsr *orig, - matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* orig, + matrix::Fbcsr* trans) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); @@ -114,8 +114,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void conj_transpose(std::shared_ptr exec, - const matrix::Fbcsr *orig, - matrix::Fbcsr *trans) + const matrix::Fbcsr* orig, + matrix::Fbcsr* trans) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -125,8 +125,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_max_nnz_per_row( std::shared_ptr exec, - const matrix::Fbcsr *source, - size_type *result) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* source, + size_type* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); @@ -135,8 +135,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_nonzeros_per_row( std::shared_ptr exec, - const matrix::Fbcsr *source, - Array *result) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* source, + Array* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); @@ -145,8 +145,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::Fbcsr *to_check, - bool *is_sorted) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* to_check, + bool* is_sorted) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); @@ -154,7 +154,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void sort_by_column_index(const std::shared_ptr exec, - matrix::Fbcsr *const to_sort) + matrix::Fbcsr* const to_sort) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -163,8 +163,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Fbcsr *orig, - matrix::Diagonal *diag) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* orig, + matrix::Diagonal* diag) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); diff --git a/dpcpp/matrix/hybrid_kernels.dp.cpp b/dpcpp/matrix/hybrid_kernels.dp.cpp index 13fa02a3331..79f272f8b69 100644 --- a/dpcpp/matrix/hybrid_kernels.dp.cpp +++ b/dpcpp/matrix/hybrid_kernels.dp.cpp @@ -87,9 +87,9 @@ namespace kernel { template void count_coo_row_nnz(const size_type nnz, const size_type num_lines, - const ValueType *__restrict__ val, - const IndexType *__restrict__ row, - IndexType *__restrict__ nnz_per_row, + const ValueType* __restrict__ val, + const IndexType* __restrict__ row, + IndexType* __restrict__ nnz_per_row, sycl::nd_item<3> item_ct1) { IndexType temp_val = 0; @@ -138,11 +138,11 @@ void count_coo_row_nnz(const size_type nnz, const size_type num_lines, template void count_coo_row_nnz(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, const size_type nnz, - const size_type num_lines, const ValueType *val, - const IndexType *row, IndexType *nnz_per_row) + sycl::queue* queue, const size_type nnz, + const size_type num_lines, const ValueType* val, + const IndexType* row, IndexType* nnz_per_row) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { count_coo_row_nnz(nnz, num_lines, val, row, @@ -154,14 +154,14 @@ void count_coo_row_nnz(dim3 grid, dim3 block, size_type dynamic_shared_memory, template void fill_in_csr(size_type num_rows, size_type max_nnz_per_row, - size_type stride, const ValueType *__restrict__ ell_val, - const IndexType *__restrict__ ell_col, - const ValueType *__restrict__ coo_val, - const IndexType *__restrict__ coo_col, - const IndexType *__restrict__ coo_offset, - IndexType *__restrict__ result_row_ptrs, - IndexType *__restrict__ result_col_idxs, - ValueType *__restrict__ result_values, + size_type stride, const ValueType* __restrict__ ell_val, + const IndexType* __restrict__ ell_col, + const ValueType* __restrict__ coo_val, + const IndexType* __restrict__ coo_col, + const IndexType* __restrict__ coo_offset, + IndexType* __restrict__ result_row_ptrs, + IndexType* __restrict__ result_col_idxs, + ValueType* __restrict__ result_values, sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); @@ -190,8 +190,8 @@ GKO_ENABLE_DEFAULT_HOST(fill_in_csr, fill_in_csr); template -void add(size_type num, ValueType1 *__restrict__ val1, - const ValueType2 *__restrict__ val2, sycl::nd_item<3> item_ct1) +void add(size_type num, ValueType1* __restrict__ val1, + const ValueType2* __restrict__ val2, sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); if (tidx < num) { @@ -207,8 +207,8 @@ GKO_ENABLE_DEFAULT_HOST(add, add); template void convert_to_dense(std::shared_ptr exec, - const matrix::Hybrid *source, - matrix::Dense *result) GKO_NOT_IMPLEMENTED; + const matrix::Hybrid* source, + matrix::Dense* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_HYBRID_CONVERT_TO_DENSE_KERNEL); @@ -216,8 +216,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Hybrid *source, - matrix::Csr *result) + const matrix::Hybrid* source, + matrix::Csr* result) { const auto num_rows = source->get_size()[0]; auto coo_offset = Array(exec, num_rows + 1); @@ -281,8 +281,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Hybrid *source, - size_type *result) + const matrix::Hybrid* source, + size_type* result) { size_type ell_nnz = 0; size_type coo_nnz = 0; diff --git a/dpcpp/matrix/sellp_kernels.dp.cpp b/dpcpp/matrix/sellp_kernels.dp.cpp index cd20591f48d..dffe586c7bf 100644 --- a/dpcpp/matrix/sellp_kernels.dp.cpp +++ b/dpcpp/matrix/sellp_kernels.dp.cpp @@ -72,11 +72,11 @@ namespace { template void spmv_kernel(size_type num_rows, size_type num_right_hand_sides, size_type b_stride, size_type c_stride, - const size_type *__restrict__ slice_lengths, - const size_type *__restrict__ slice_sets, - const ValueType *__restrict__ a, - const IndexType *__restrict__ col, - const ValueType *__restrict__ b, ValueType *__restrict__ c, + const size_type* __restrict__ slice_lengths, + const size_type* __restrict__ slice_sets, + const ValueType* __restrict__ a, + const IndexType* __restrict__ col, + const ValueType* __restrict__ b, ValueType* __restrict__ c, sycl::nd_item<3> item_ct1) { const auto slice_id = item_ct1.get_group(2); @@ -102,14 +102,14 @@ GKO_ENABLE_DEFAULT_HOST(spmv_kernel, spmv_kernel); template void advanced_spmv_kernel(size_type num_rows, size_type num_right_hand_sides, size_type b_stride, size_type c_stride, - const size_type *__restrict__ slice_lengths, - const size_type *__restrict__ slice_sets, - const ValueType *__restrict__ alpha, - const ValueType *__restrict__ a, - const IndexType *__restrict__ col, - const ValueType *__restrict__ b, - const ValueType *__restrict__ beta, - ValueType *__restrict__ c, sycl::nd_item<3> item_ct1) + const size_type* __restrict__ slice_lengths, + const size_type* __restrict__ slice_sets, + const ValueType* __restrict__ alpha, + const ValueType* __restrict__ a, + const IndexType* __restrict__ col, + const ValueType* __restrict__ b, + const ValueType* __restrict__ beta, + ValueType* __restrict__ c, sycl::nd_item<3> item_ct1) { const auto slice_id = item_ct1.get_group(2); const auto slice_size = item_ct1.get_local_range().get(2); @@ -140,7 +140,7 @@ namespace kernel { template void initialize_zero_dense(size_type num_rows, size_type num_cols, - size_type stride, ValueType *__restrict__ result, + size_type stride, ValueType* __restrict__ result, sycl::nd_item<3> item_ct1) { const auto tidx_x = @@ -160,11 +160,11 @@ GKO_ENABLE_DEFAULT_HOST(initialize_zero_dense, initialize_zero_dense); template void fill_in_dense(size_type num_rows, size_type num_cols, size_type stride, size_type slice_size, - const size_type *__restrict__ slice_lengths, - const size_type *__restrict__ slice_sets, - const IndexType *__restrict__ col_idxs, - const ValueType *__restrict__ values, - ValueType *__restrict__ result, sycl::nd_item<3> item_ct1) + const size_type* __restrict__ slice_lengths, + const size_type* __restrict__ slice_sets, + const IndexType* __restrict__ col_idxs, + const ValueType* __restrict__ values, + ValueType* __restrict__ result, sycl::nd_item<3> item_ct1) { const auto global_row = thread::get_subwarp_id_flat(item_ct1); @@ -187,13 +187,13 @@ void fill_in_dense(size_type num_rows, size_type num_cols, size_type stride, template void fill_in_dense(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type num_rows, size_type num_cols, + sycl::queue* queue, size_type num_rows, size_type num_cols, size_type stride, size_type slice_size, - const size_type *slice_lengths, const size_type *slice_sets, - const IndexType *col_idxs, const ValueType *values, - ValueType *result) + const size_type* slice_lengths, const size_type* slice_sets, + const IndexType* col_idxs, const ValueType* values, + ValueType* result) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { fill_in_dense( @@ -206,9 +206,9 @@ void fill_in_dense(dim3 grid, dim3 block, size_type dynamic_shared_memory, template void count_nnz_per_row(size_type num_rows, size_type slice_size, - const size_type *__restrict__ slice_sets, - const ValueType *__restrict__ values, - IndexType *__restrict__ result, + const size_type* __restrict__ slice_sets, + const ValueType* __restrict__ values, + IndexType* __restrict__ result, sycl::nd_item<3> item_ct1) { constexpr auto warp_size = config::warp_size; @@ -232,7 +232,7 @@ void count_nnz_per_row(size_type num_rows, size_type slice_size, } result[row_idx] = ::gko::kernels::dpcpp::reduce( warp_tile, part_result, - [](const size_type &a, const size_type &b) { return a + b; }); + [](const size_type& a, const size_type& b) { return a + b; }); } } @@ -241,12 +241,12 @@ GKO_ENABLE_DEFAULT_HOST(count_nnz_per_row, count_nnz_per_row); template void fill_in_csr(size_type num_rows, size_type slice_size, - const size_type *__restrict__ source_slice_sets, - const IndexType *__restrict__ source_col_idxs, - const ValueType *__restrict__ source_values, - IndexType *__restrict__ result_row_ptrs, - IndexType *__restrict__ result_col_idxs, - ValueType *__restrict__ result_values, + const size_type* __restrict__ source_slice_sets, + const IndexType* __restrict__ source_col_idxs, + const ValueType* __restrict__ source_values, + IndexType* __restrict__ result_row_ptrs, + IndexType* __restrict__ result_col_idxs, + ValueType* __restrict__ result_values, sycl::nd_item<3> item_ct1) { const auto row = thread::get_thread_id_flat(item_ct1); @@ -273,10 +273,10 @@ GKO_ENABLE_DEFAULT_HOST(fill_in_csr, fill_in_csr); template void extract_diagonal(size_type diag_size, size_type slice_size, - const size_type *__restrict__ orig_slice_sets, - const ValueType *__restrict__ orig_values, - const IndexType *__restrict__ orig_col_idxs, - ValueType *__restrict__ diag, sycl::nd_item<3> item_ct1) + const size_type* __restrict__ orig_slice_sets, + const ValueType* __restrict__ orig_values, + const IndexType* __restrict__ orig_col_idxs, + ValueType* __restrict__ diag, sycl::nd_item<3> item_ct1) { constexpr auto warp_size = config::warp_size; auto warp_tile = @@ -312,8 +312,8 @@ GKO_ENABLE_DEFAULT_HOST(extract_diagonal, extract_diagonal); template void spmv(std::shared_ptr exec, - const matrix::Sellp *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Sellp* a, + const matrix::Dense* b, matrix::Dense* c) { const dim3 blockSize(matrix::default_slice_size); const dim3 gridSize(ceildiv(a->get_size()[0], matrix::default_slice_size), @@ -331,11 +331,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SELLP_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Sellp *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Sellp* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { const dim3 blockSize(matrix::default_slice_size); const dim3 gridSize(ceildiv(a->get_size()[0], matrix::default_slice_size), @@ -356,8 +356,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Sellp *source, - matrix::Dense *result) + const matrix::Sellp* source, + matrix::Dense* result) { const auto num_rows = source->get_size()[0]; const auto num_cols = source->get_size()[1]; @@ -398,8 +398,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Sellp *source, - matrix::Csr *result) + const matrix::Sellp* source, + matrix::Csr* result) { const auto num_rows = source->get_size()[0]; const auto slice_size = source->get_slice_size(); @@ -443,8 +443,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Sellp *source, - size_type *result) + const matrix::Sellp* source, + size_type* result) { const auto num_rows = source->get_size()[0]; @@ -474,8 +474,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Sellp *orig, - matrix::Diagonal *diag) + const matrix::Sellp* orig, + matrix::Diagonal* diag) { const auto diag_size = diag->get_size()[0]; const auto slice_size = orig->get_slice_size(); diff --git a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp index 7dc935e83a5..0393c7d6203 100644 --- a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp +++ b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp @@ -49,9 +49,9 @@ namespace sparsity_csr { template void spmv(std::shared_ptr exec, - const matrix::SparsityCsr *a, - const matrix::Dense *b, - matrix::Dense *c) GKO_NOT_IMPLEMENTED; + const matrix::SparsityCsr* a, + const matrix::Dense* b, + matrix::Dense* c) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_SPARSITY_CSR_SPMV_KERNEL); @@ -59,11 +59,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::SparsityCsr *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) GKO_NOT_IMPLEMENTED; + const matrix::Dense* alpha, + const matrix::SparsityCsr* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_SPARSITY_CSR_ADVANCED_SPMV_KERNEL); @@ -72,8 +72,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_num_diagonal_elements( std::shared_ptr exec, - const matrix::SparsityCsr *matrix, - size_type *num_diagonal_elements) GKO_NOT_IMPLEMENTED; + const matrix::SparsityCsr* matrix, + size_type* num_diagonal_elements) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_SPARSITY_CSR_COUNT_NUM_DIAGONAL_ELEMENTS_KERNEL); @@ -81,9 +81,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void remove_diagonal_elements( - std::shared_ptr exec, const IndexType *row_ptrs, - const IndexType *col_idxs, - matrix::SparsityCsr *matrix) GKO_NOT_IMPLEMENTED; + std::shared_ptr exec, const IndexType* row_ptrs, + const IndexType* col_idxs, + matrix::SparsityCsr* matrix) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_SPARSITY_CSR_REMOVE_DIAGONAL_ELEMENTS_KERNEL); @@ -91,8 +91,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void transpose(std::shared_ptr exec, - const matrix::SparsityCsr *orig, - matrix::SparsityCsr *trans) + const matrix::SparsityCsr* orig, + matrix::SparsityCsr* trans) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -101,7 +101,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void sort_by_column_index(std::shared_ptr exec, - matrix::SparsityCsr *to_sort) + matrix::SparsityCsr* to_sort) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -111,8 +111,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::SparsityCsr *to_check, - bool *is_sorted) GKO_NOT_IMPLEMENTED; + const matrix::SparsityCsr* to_check, + bool* is_sorted) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_SPARSITY_CSR_IS_SORTED_BY_COLUMN_INDEX); diff --git a/dpcpp/multigrid/amgx_pgm_kernels.dp.cpp b/dpcpp/multigrid/amgx_pgm_kernels.dp.cpp index 14baa306bca..193fdf096a1 100644 --- a/dpcpp/multigrid/amgx_pgm_kernels.dp.cpp +++ b/dpcpp/multigrid/amgx_pgm_kernels.dp.cpp @@ -57,23 +57,23 @@ namespace amgx_pgm { template void match_edge(std::shared_ptr exec, - const Array &strongest_neighbor, - Array &agg) GKO_NOT_IMPLEMENTED; + const Array& strongest_neighbor, + Array& agg) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_MATCH_EDGE_KERNEL); template void count_unagg(std::shared_ptr exec, - const Array &agg, - IndexType *num_unagg) GKO_NOT_IMPLEMENTED; + const Array& agg, + IndexType* num_unagg) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_COUNT_UNAGG_KERNEL); template -void renumber(std::shared_ptr exec, Array &agg, - IndexType *num_agg) GKO_NOT_IMPLEMENTED; +void renumber(std::shared_ptr exec, Array& agg, + IndexType* num_agg) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_RENUMBER_KERNEL); @@ -81,9 +81,9 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_RENUMBER_KERNEL); template void find_strongest_neighbor( std::shared_ptr exec, - const matrix::Csr *weight_mtx, - const matrix::Diagonal *diag, Array &agg, - Array &strongest_neighbor) GKO_NOT_IMPLEMENTED; + const matrix::Csr* weight_mtx, + const matrix::Diagonal* diag, Array& agg, + Array& strongest_neighbor) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( GKO_DECLARE_AMGX_PGM_FIND_STRONGEST_NEIGHBOR); @@ -92,9 +92,9 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( template void assign_to_exist_agg( std::shared_ptr exec, - const matrix::Csr *weight_mtx, - const matrix::Diagonal *diag, Array &agg, - Array &intermediate_agg) GKO_NOT_IMPLEMENTED; + const matrix::Csr* weight_mtx, + const matrix::Diagonal* diag, Array& agg, + Array& intermediate_agg) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( GKO_DECLARE_AMGX_PGM_ASSIGN_TO_EXIST_AGG); diff --git a/dpcpp/preconditioner/isai_kernels.dp.cpp b/dpcpp/preconditioner/isai_kernels.dp.cpp index 33cbe044fc9..2093d84ed8e 100644 --- a/dpcpp/preconditioner/isai_kernels.dp.cpp +++ b/dpcpp/preconditioner/isai_kernels.dp.cpp @@ -62,24 +62,24 @@ namespace isai { template -void forall_matching(const IndexType *fst, IndexType fst_size, - const IndexType *snd, IndexType snd_size, +void forall_matching(const IndexType* fst, IndexType fst_size, + const IndexType* snd, IndexType snd_size, Callback cb) GKO_NOT_IMPLEMENTED; template void generic_generate(std::shared_ptr exec, - const matrix::Csr *mtx, - matrix::Csr *inverse_mtx, - IndexType *excess_rhs_ptrs, IndexType *excess_nz_ptrs, + const matrix::Csr* mtx, + matrix::Csr* inverse_mtx, + IndexType* excess_rhs_ptrs, IndexType* excess_nz_ptrs, Callable trs_solve) GKO_NOT_IMPLEMENTED; template void generate_tri_inverse(std::shared_ptr exec, - const matrix::Csr *mtx, - matrix::Csr *inverse_mtx, - IndexType *excess_rhs_ptrs, IndexType *excess_nz_ptrs, + const matrix::Csr* mtx, + matrix::Csr* inverse_mtx, + IndexType* excess_rhs_ptrs, IndexType* excess_nz_ptrs, bool lower) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -88,10 +88,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void generate_general_inverse(std::shared_ptr exec, - const matrix::Csr *input, - matrix::Csr *inverse, - IndexType *excess_rhs_ptrs, - IndexType *excess_nz_ptrs, + const matrix::Csr* input, + matrix::Csr* inverse, + IndexType* excess_rhs_ptrs, + IndexType* excess_nz_ptrs, bool spd) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -100,12 +100,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void generate_excess_system(std::shared_ptr, - const matrix::Csr *input, - const matrix::Csr *inverse, - const IndexType *excess_rhs_ptrs, - const IndexType *excess_nz_ptrs, - matrix::Csr *excess_system, - matrix::Dense *excess_rhs, + const matrix::Csr* input, + const matrix::Csr* inverse, + const IndexType* excess_rhs_ptrs, + const IndexType* excess_nz_ptrs, + matrix::Csr* excess_system, + matrix::Dense* excess_rhs, size_type e_start, size_type e_end) GKO_NOT_IMPLEMENTED; @@ -115,8 +115,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void scale_excess_solution(std::shared_ptr, - const IndexType *excess_block_ptrs, - matrix::Dense *excess_solution, + const IndexType* excess_block_ptrs, + matrix::Dense* excess_solution, size_type e_start, size_type e_end) GKO_NOT_IMPLEMENTED; @@ -126,9 +126,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void scatter_excess_solution(std::shared_ptr, - const IndexType *excess_block_ptrs, - const matrix::Dense *excess_solution, - matrix::Csr *inverse, + const IndexType* excess_block_ptrs, + const matrix::Dense* excess_solution, + matrix::Csr* inverse, size_type e_start, size_type e_end) GKO_NOT_IMPLEMENTED; diff --git a/dpcpp/preconditioner/jacobi_kernels.dp.cpp b/dpcpp/preconditioner/jacobi_kernels.dp.cpp index 623e2c121ec..cc22a0974d4 100644 --- a/dpcpp/preconditioner/jacobi_kernels.dp.cpp +++ b/dpcpp/preconditioner/jacobi_kernels.dp.cpp @@ -67,8 +67,8 @@ namespace jacobi { void initialize_precisions(std::shared_ptr exec, - const Array &source, - Array &precisions) + const Array& source, + Array& precisions) GKO_NOT_IMPLEMENTED; @@ -77,20 +77,20 @@ namespace { template inline bool has_same_nonzero_pattern( - const IndexType *prev_row_ptr, const IndexType *curr_row_ptr, - const IndexType *next_row_ptr) GKO_NOT_IMPLEMENTED; + const IndexType* prev_row_ptr, const IndexType* curr_row_ptr, + const IndexType* next_row_ptr) GKO_NOT_IMPLEMENTED; template -size_type find_natural_blocks(const matrix::Csr *mtx, +size_type find_natural_blocks(const matrix::Csr* mtx, uint32 max_block_size, - IndexType *block_ptrs) GKO_NOT_IMPLEMENTED; + IndexType* block_ptrs) GKO_NOT_IMPLEMENTED; template inline size_type agglomerate_supervariables( uint32 max_block_size, size_type num_natural_blocks, - IndexType *block_ptrs) GKO_NOT_IMPLEMENTED; + IndexType* block_ptrs) GKO_NOT_IMPLEMENTED; } // namespace @@ -98,9 +98,9 @@ inline size_type agglomerate_supervariables( template void find_blocks(std::shared_ptr exec, - const matrix::Csr *system_matrix, - uint32 max_block_size, size_type &num_blocks, - Array &block_pointers) GKO_NOT_IMPLEMENTED; + const matrix::Csr* system_matrix, + uint32 max_block_size, size_type& num_blocks, + Array& block_pointers) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_JACOBI_FIND_BLOCKS_KERNEL); @@ -110,25 +110,25 @@ namespace { template -inline void extract_block(const matrix::Csr *mtx, +inline void extract_block(const matrix::Csr* mtx, IndexType block_size, IndexType block_start, - ValueType *block, + ValueType* block, size_type stride) GKO_NOT_IMPLEMENTED; template -inline IndexType choose_pivot(IndexType block_size, const ValueType *block, +inline IndexType choose_pivot(IndexType block_size, const ValueType* block, size_type stride) GKO_NOT_IMPLEMENTED; template inline void swap_rows(IndexType row1, IndexType row2, IndexType block_size, - ValueType *block, size_type stride) GKO_NOT_IMPLEMENTED; + ValueType* block, size_type stride) GKO_NOT_IMPLEMENTED; template inline bool apply_gauss_jordan_transform(IndexType row, IndexType col, - IndexType block_size, ValueType *block, + IndexType block_size, ValueType* block, size_type stride) GKO_NOT_IMPLEMENTED; @@ -137,8 +137,8 @@ template > inline void transpose_block( - IndexType block_size, const SourceValueType *from, size_type from_stride, - ResultValueType *to, size_type to_stride, + IndexType block_size, const SourceValueType* from, size_type from_stride, + ResultValueType* to, size_type to_stride, ValueConverter converter = {}) noexcept GKO_NOT_IMPLEMENTED; @@ -147,8 +147,8 @@ template > inline void conj_transpose_block( - IndexType block_size, const SourceValueType *from, size_type from_stride, - ResultValueType *to, size_type to_stride, + IndexType block_size, const SourceValueType* from, size_type from_stride, + ResultValueType* to, size_type to_stride, ValueConverter converter = {}) noexcept GKO_NOT_IMPLEMENTED; @@ -157,22 +157,22 @@ template > inline void permute_and_transpose_block( - IndexType block_size, const IndexType *col_perm, - const SourceValueType *source, size_type source_stride, - ResultValueType *result, size_type result_stride, + IndexType block_size, const IndexType* col_perm, + const SourceValueType* source, size_type source_stride, + ResultValueType* result, size_type result_stride, ValueConverter converter = {}) GKO_NOT_IMPLEMENTED; template -inline bool invert_block(IndexType block_size, IndexType *perm, - ValueType *block, +inline bool invert_block(IndexType block_size, IndexType* perm, + ValueType* block, size_type stride) GKO_NOT_IMPLEMENTED; template inline bool validate_precision_reduction_feasibility( std::shared_ptr exec, IndexType block_size, - const ValueType *block, size_type stride) GKO_NOT_IMPLEMENTED; + const ValueType* block, size_type stride) GKO_NOT_IMPLEMENTED; } // namespace @@ -180,15 +180,15 @@ inline bool validate_precision_reduction_feasibility( template void generate(std::shared_ptr exec, - const matrix::Csr *system_matrix, + const matrix::Csr* system_matrix, size_type num_blocks, uint32 max_block_size, remove_complex accuracy, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - Array> &conditioning, - Array &block_precisions, - const Array &block_pointers, - Array &blocks) GKO_NOT_IMPLEMENTED; + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + Array>& conditioning, + Array& block_precisions, + const Array& block_pointers, + Array& blocks) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_JACOBI_GENERATE_KERNEL); @@ -201,9 +201,9 @@ template < typename ValueType, typename BlockValueType, typename ValueConverter = default_converter> inline void apply_block(size_type block_size, size_type num_rhs, - const BlockValueType *block, size_type stride, - ValueType alpha, const ValueType *b, size_type stride_b, - ValueType beta, ValueType *x, size_type stride_x, + const BlockValueType* block, size_type stride, + ValueType alpha, const ValueType* b, size_type stride_b, + ValueType beta, ValueType* x, size_type stride_x, ValueConverter converter = {}) GKO_NOT_IMPLEMENTED; @@ -213,15 +213,15 @@ inline void apply_block(size_type block_size, size_type num_rhs, template void apply(std::shared_ptr exec, size_type num_blocks, uint32 max_block_size, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - const Array &block_precisions, - const Array &block_pointers, - const Array &blocks, - const matrix::Dense *alpha, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *x) GKO_NOT_IMPLEMENTED; + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + const Array& block_precisions, + const Array& block_pointers, + const Array& blocks, + const matrix::Dense* alpha, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* x) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_JACOBI_APPLY_KERNEL); @@ -230,12 +230,12 @@ template void simple_apply( std::shared_ptr exec, size_type num_blocks, uint32 max_block_size, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const matrix::Dense *b, - matrix::Dense *x) GKO_NOT_IMPLEMENTED; + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const matrix::Dense* b, + matrix::Dense* x) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_JACOBI_SIMPLE_APPLY_KERNEL); @@ -244,11 +244,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void transpose_jacobi( std::shared_ptr exec, size_type num_blocks, - uint32 max_block_size, const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - Array &out_blocks) GKO_NOT_IMPLEMENTED; + uint32 max_block_size, const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + Array& out_blocks) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_JACOBI_TRANSPOSE_KERNEL); @@ -257,11 +257,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void conj_transpose_jacobi( std::shared_ptr exec, size_type num_blocks, - uint32 max_block_size, const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - Array &out_blocks) GKO_NOT_IMPLEMENTED; + uint32 max_block_size, const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + Array& out_blocks) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_JACOBI_CONJ_TRANSPOSE_KERNEL); @@ -270,11 +270,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense( std::shared_ptr exec, size_type num_blocks, - const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - ValueType *result_values, size_type result_stride) GKO_NOT_IMPLEMENTED; + const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + ValueType* result_values, size_type result_stride) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL); diff --git a/dpcpp/reorder/rcm_kernels.dp.cpp b/dpcpp/reorder/rcm_kernels.dp.cpp index baf60231bf5..fa1d550f195 100644 --- a/dpcpp/reorder/rcm_kernels.dp.cpp +++ b/dpcpp/reorder/rcm_kernels.dp.cpp @@ -58,8 +58,8 @@ namespace rcm { template void get_degree_of_nodes(std::shared_ptr exec, const IndexType num_vertices, - const IndexType *const row_ptrs, - IndexType *const degrees) GKO_NOT_IMPLEMENTED; + const IndexType* const row_ptrs, + IndexType* const degrees) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL); @@ -67,9 +67,9 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL); template void get_permutation( std::shared_ptr exec, const IndexType num_vertices, - const IndexType *const row_ptrs, const IndexType *const col_idxs, - const IndexType *const degrees, IndexType *const permutation, - IndexType *const inv_permutation, + const IndexType* const row_ptrs, const IndexType* const col_idxs, + const IndexType* const degrees, IndexType* const permutation, + IndexType* const inv_permutation, const gko::reorder::starting_strategy strategy) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); diff --git a/dpcpp/solver/cb_gmres_kernels.dp.cpp b/dpcpp/solver/cb_gmres_kernels.dp.cpp index 2dc9d8b84b5..3d9aef5a879 100644 --- a/dpcpp/solver/cb_gmres_kernels.dp.cpp +++ b/dpcpp/solver/cb_gmres_kernels.dp.cpp @@ -81,7 +81,7 @@ constexpr int default_dot_size = default_dot_dim * default_dot_dim; template void zero_matrix_kernel(size_type m, size_type n, size_type stride, - ValueType *__restrict__ array, + ValueType* __restrict__ array, sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); @@ -101,7 +101,7 @@ GKO_ENABLE_DEFAULT_HOST(zero_matrix_kernel, zero_matrix_kernel); template void initialize_2_1_kernel(size_type num_rows, size_type num_rhs, size_type krylov_dim, Accessor3d krylov_bases, - ValueType *__restrict__ residual_norm_collection, + ValueType* __restrict__ residual_norm_collection, size_type stride_residual_nc, sycl::nd_item<3> item_ct1) { @@ -131,13 +131,13 @@ void initialize_2_1_kernel(size_type num_rows, size_type num_rhs, template void initialize_2_1_kernel(dim3 grid, dim3 block, - size_type dynamic_shared_memory, sycl::queue *queue, + size_type dynamic_shared_memory, sycl::queue* queue, size_type num_rows, size_type num_rhs, size_type krylov_dim, Accessor3d krylov_bases, - ValueType *residual_norm_collection, + ValueType* residual_norm_collection, size_type stride_residual_nc) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { initialize_2_1_kernel( @@ -152,11 +152,11 @@ void initialize_2_1_kernel(dim3 grid, dim3 block, template void initialize_2_2_kernel( size_type num_rows, size_type num_rhs, - const ValueType *__restrict__ residual, size_type stride_residual, - const remove_complex *__restrict__ residual_norm, - ValueType *__restrict__ residual_norm_collection, Accessor3d krylov_bases, - ValueType *__restrict__ next_krylov_basis, size_type stride_next_krylov, - size_type *__restrict__ final_iter_nums, sycl::nd_item<3> item_ct1) + const ValueType* __restrict__ residual, size_type stride_residual, + const remove_complex* __restrict__ residual_norm, + ValueType* __restrict__ residual_norm_collection, Accessor3d krylov_bases, + ValueType* __restrict__ next_krylov_basis, size_type stride_next_krylov, + size_type* __restrict__ final_iter_nums, sycl::nd_item<3> item_ct1) { const auto global_id = thread::get_thread_id_flat(item_ct1); const auto krylov_stride = @@ -180,14 +180,14 @@ void initialize_2_2_kernel( template void initialize_2_2_kernel( - dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue, - size_type num_rows, size_type num_rhs, const ValueType *residual, - size_type stride_residual, const remove_complex *residual_norm, - ValueType *residual_norm_collection, Accessor3d krylov_bases, - ValueType *next_krylov_basis, size_type stride_next_krylov, - size_type *final_iter_nums) + dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue, + size_type num_rows, size_type num_rhs, const ValueType* residual, + size_type stride_residual, const remove_complex* residual_norm, + ValueType* residual_norm_collection, Accessor3d krylov_bases, + ValueType* next_krylov_basis, size_type stride_next_krylov, + size_type* final_iter_nums) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { initialize_2_2_kernel( @@ -200,8 +200,8 @@ void initialize_2_2_kernel( void increase_final_iteration_numbers_kernel( - size_type *__restrict__ final_iter_nums, - const stopping_status *__restrict__ stop_status, size_type total_number, + size_type* __restrict__ final_iter_nums, + const stopping_status* __restrict__ stop_status, size_type total_number, sycl::nd_item<3> item_ct1) { const auto global_id = thread::get_thread_id_flat(item_ct1); @@ -217,12 +217,12 @@ GKO_ENABLE_DEFAULT_HOST(increase_final_iteration_numbers_kernel, template void multinorm2_kernel( size_type num_rows, size_type num_cols, - const ValueType *__restrict__ next_krylov_basis, - size_type stride_next_krylov, remove_complex *__restrict__ norms, - const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1, + const ValueType* __restrict__ next_krylov_basis, + size_type stride_next_krylov, remove_complex* __restrict__ norms, + const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1, UninitializedArray, - default_dot_dim *(default_dot_dim + 1)> - *reduction_helper_array) + default_dot_dim*(default_dot_dim + 1)>* + reduction_helper_array) { using rc_vtype = remove_complex; const auto tidx = item_ct1.get_local_id(2); @@ -236,7 +236,7 @@ void multinorm2_kernel( // Used that way to get around dynamic initialization warning and // template error when using `reduction_helper_array` directly in `reduce` - rc_vtype *__restrict__ reduction_helper = (*reduction_helper_array); + rc_vtype* __restrict__ reduction_helper = (*reduction_helper_array); rc_vtype local_res = zero(); if (col_idx < num_cols && !stop_status[col_idx].has_stopped()) { for (size_type i = start_row + tidy; i < end_row; @@ -252,7 +252,7 @@ void multinorm2_kernel( group::this_thread_block(item_ct1)); const auto sum = ::gko::kernels::dpcpp::reduce( tile_block, local_res, - [](const rc_vtype &a, const rc_vtype &b) { return a + b; }); + [](const rc_vtype& a, const rc_vtype& b) { return a + b; }); const auto new_col_idx = item_ct1.get_group(2) * default_dot_dim + tidy; if (tidx == 0 && new_col_idx < num_cols && !stop_status[new_col_idx].has_stopped()) { @@ -263,16 +263,16 @@ void multinorm2_kernel( template void multinorm2_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type num_rows, - size_type num_cols, const ValueType *next_krylov_basis, + sycl::queue* queue, size_type num_rows, + size_type num_cols, const ValueType* next_krylov_basis, size_type stride_next_krylov, - remove_complex *norms, - const stopping_status *stop_status) + remove_complex* norms, + const stopping_status* stop_status) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor< UninitializedArray, - default_dot_dim *(default_dot_dim + 1)>, + default_dot_dim*(default_dot_dim + 1)>, 0, sycl::access_mode::read_write, sycl::access::target::local> reduction_helper_array_acc_ct1(cgh); @@ -290,12 +290,12 @@ void multinorm2_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, template void multinorminf_without_stop_kernel( size_type num_rows, size_type num_cols, - const ValueType *__restrict__ next_krylov_basis, - size_type stride_next_krylov, remove_complex *__restrict__ norms, + const ValueType* __restrict__ next_krylov_basis, + size_type stride_next_krylov, remove_complex* __restrict__ norms, size_type stride_norms, sycl::nd_item<3> item_ct1, UninitializedArray, - default_dot_dim *(default_dot_dim + 1)> - *reduction_helper_array) + default_dot_dim*(default_dot_dim + 1)>* + reduction_helper_array) { using rc_vtype = remove_complex; const auto tidx = item_ct1.get_local_id(2); @@ -309,7 +309,7 @@ void multinorminf_without_stop_kernel( // Used that way to get around dynamic initialization warning and // template error when using `reduction_helper_array` directly in `reduce` - rc_vtype *__restrict__ reduction_helper = (*reduction_helper_array); + rc_vtype* __restrict__ reduction_helper = (*reduction_helper_array); rc_vtype local_max = zero(); if (col_idx < num_cols) { for (size_type i = start_row + tidy; i < end_row; @@ -327,7 +327,7 @@ void multinorminf_without_stop_kernel( const auto tile_block = group::tiled_partition( group::this_thread_block(item_ct1)); const auto value = ::gko::kernels::dpcpp::reduce( - tile_block, local_max, [](const rc_vtype &a, const rc_vtype &b) { + tile_block, local_max, [](const rc_vtype& a, const rc_vtype& b) { return ((a >= b) ? a : b); }); const auto new_col_idx = item_ct1.get_group(2) * default_dot_dim + tidy; @@ -339,15 +339,15 @@ void multinorminf_without_stop_kernel( template void multinorminf_without_stop_kernel( - dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue, - size_type num_rows, size_type num_cols, const ValueType *next_krylov_basis, - size_type stride_next_krylov, remove_complex *norms, + dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue, + size_type num_rows, size_type num_cols, const ValueType* next_krylov_basis, + size_type stride_next_krylov, remove_complex* norms, size_type stride_norms) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor< UninitializedArray, - default_dot_dim *(default_dot_dim + 1)>, + default_dot_dim*(default_dot_dim + 1)>, 0, sycl::access_mode::read_write, sycl::access::target::local> reduction_helper_array_acc_ct1(cgh); @@ -366,15 +366,15 @@ void multinorminf_without_stop_kernel( template void multinorm2_inf_kernel( size_type num_rows, size_type num_cols, - const ValueType *__restrict__ next_krylov_basis, + const ValueType* __restrict__ next_krylov_basis, size_type stride_next_krylov, - remove_complex *__restrict__ norms1, - remove_complex *__restrict__ norms2, - const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1, + remove_complex* __restrict__ norms1, + remove_complex* __restrict__ norms2, + const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1, UninitializedArray, (1 + compute_inf) * - default_dot_dim *(default_dot_dim + 1)> - *reduction_helper_array) + default_dot_dim*(default_dot_dim + 1)>* + reduction_helper_array) { using rc_vtype = remove_complex; const auto tidx = item_ct1.get_local_id(2); @@ -388,9 +388,9 @@ void multinorm2_inf_kernel( // Used that way to get around dynamic initialization warning and // template error when using `reduction_helper_array` directly in `reduce` - rc_vtype *__restrict__ reduction_helper_add = (*reduction_helper_array); - rc_vtype *__restrict__ reduction_helper_max = - static_cast((*reduction_helper_array)) + + rc_vtype* __restrict__ reduction_helper_add = (*reduction_helper_array); + rc_vtype* __restrict__ reduction_helper_max = + static_cast((*reduction_helper_array)) + default_dot_dim * (default_dot_dim + 1); rc_vtype local_res = zero(); rc_vtype local_max = zero(); @@ -417,12 +417,12 @@ void multinorm2_inf_kernel( group::this_thread_block(item_ct1)); const auto sum = ::gko::kernels::dpcpp::reduce( tile_block, local_res, - [](const rc_vtype &a, const rc_vtype &b) { return a + b; }); + [](const rc_vtype& a, const rc_vtype& b) { return a + b; }); rc_vtype reduced_max{}; if (compute_inf) { local_max = reduction_helper_max[tidy * (default_dot_dim + 1) + tidx]; reduced_max = ::gko::kernels::dpcpp::reduce( - tile_block, local_max, [](const rc_vtype &a, const rc_vtype &b) { + tile_block, local_max, [](const rc_vtype& a, const rc_vtype& b) { return ((a >= b) ? a : b); }); } @@ -439,16 +439,16 @@ void multinorm2_inf_kernel( template void multinorm2_inf_kernel( - dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue, - size_type num_rows, size_type num_cols, const ValueType *next_krylov_basis, - size_type stride_next_krylov, remove_complex *norms1, - remove_complex *norms2, const stopping_status *stop_status) + dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue, + size_type num_rows, size_type num_cols, const ValueType* next_krylov_basis, + size_type stride_next_krylov, remove_complex* norms1, + remove_complex* norms2, const stopping_status* stop_status) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor< UninitializedArray, (1 + compute_inf) * - default_dot_dim *(default_dot_dim + 1)>, + default_dot_dim*(default_dot_dim + 1)>, 0, sycl::access_mode::read_write, sycl::access::target::local> reduction_helper_array_acc_ct1(cgh); @@ -466,11 +466,11 @@ void multinorm2_inf_kernel( template void multidot_kernel( size_type num_rows, size_type num_cols, - const ValueType *__restrict__ next_krylov_basis, + const ValueType* __restrict__ next_krylov_basis, size_type stride_next_krylov, const Accessor3d krylov_bases, - ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, - const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1, - UninitializedArray &reduction_helper_array) + ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1, + UninitializedArray& reduction_helper_array) { /* * In general in this kernel: @@ -497,7 +497,7 @@ void multidot_kernel( const size_type k = item_ct1.get_group(0); // Used that way to get around dynamic initialization warning and // template error when using `reduction_helper_array` directly in `reduce` - ValueType *__restrict__ reduction_helper = reduction_helper_array; + ValueType* __restrict__ reduction_helper = reduction_helper_array; ValueType local_res = zero(); if (col_idx < num_cols && !stop_status[col_idx].has_stopped()) { @@ -519,7 +519,7 @@ void multidot_kernel( const auto tile_block = group::tiled_partition(thread_block); const auto sum = ::gko::kernels::dpcpp::reduce( tile_block, local_res, - [](const ValueType &a, const ValueType &b) { return a + b; }); + [](const ValueType& a, const ValueType& b) { return a + b; }); if (tidx == 0 && new_col_idx < num_cols && !stop_status[new_col_idx].has_stopped()) { const auto hessenberg_idx = k * stride_hessenberg + new_col_idx; @@ -529,14 +529,14 @@ void multidot_kernel( template void multidot_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type num_rows, size_type num_cols, - const ValueType *next_krylov_basis, + sycl::queue* queue, size_type num_rows, size_type num_cols, + const ValueType* next_krylov_basis, size_type stride_next_krylov, - const Accessor3d krylov_bases, ValueType *hessenberg_iter, + const Accessor3d krylov_bases, ValueType* hessenberg_iter, size_type stride_hessenberg, - const stopping_status *stop_status) + const stopping_status* stop_status) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor, 0, sycl::access_mode::read_write, sycl::access::target::local> @@ -556,11 +556,11 @@ void multidot_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, template void singledot_kernel( - size_type num_rows, const ValueType *__restrict__ next_krylov_basis, + size_type num_rows, const ValueType* __restrict__ next_krylov_basis, size_type stride_next_krylov, const Accessor3d krylov_bases, - ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, - const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1, - UninitializedArray &reduction_helper_array) + ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1, + UninitializedArray& reduction_helper_array) { /* * In general in this kernel: @@ -583,7 +583,7 @@ void singledot_kernel( // Used that way to get around dynamic initialization warning and // template error when using `reduction_helper_array` directly in `reduce` - ValueType *__restrict__ reduction_helper = reduction_helper_array; + ValueType* __restrict__ reduction_helper = reduction_helper_array; ValueType local_res = zero(); if (!stop_status[col_idx].has_stopped()) { @@ -600,7 +600,7 @@ void singledot_kernel( thread_block.sync(); ::gko::kernels::dpcpp::reduce( thread_block, reduction_helper, - [](const ValueType &a, const ValueType &b) { return a + b; }); + [](const ValueType& a, const ValueType& b) { return a + b; }); if (tidx == 0 && !stop_status[col_idx].has_stopped()) { const auto hessenberg_idx = k * stride_hessenberg + col_idx; atomic_add(hessenberg_iter + hessenberg_idx, reduction_helper[0]); @@ -609,14 +609,14 @@ void singledot_kernel( template void singledot_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type num_rows, - const ValueType *next_krylov_basis, + sycl::queue* queue, size_type num_rows, + const ValueType* next_krylov_basis, size_type stride_next_krylov, - const Accessor3d krylov_bases, ValueType *hessenberg_iter, + const Accessor3d krylov_bases, ValueType* hessenberg_iter, size_type stride_hessenberg, - const stopping_status *stop_status) + const stopping_status* stop_status) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor, 0, sycl::access_mode::read_write, sycl::access::target::local> @@ -639,10 +639,10 @@ void singledot_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, template void update_next_krylov_kernel( size_type num_iters, size_type num_rows, size_type num_cols, - ValueType *__restrict__ next_krylov_basis, size_type stride_next_krylov, + ValueType* __restrict__ next_krylov_basis, size_type stride_next_krylov, const Accessor3d krylov_bases, - const ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, - const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1) + const ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1) { const auto global_id = thread::get_thread_id_flat(item_ct1); const auto row_idx = global_id / stride_next_krylov; @@ -664,13 +664,13 @@ void update_next_krylov_kernel( template void update_next_krylov_kernel( - dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue, + dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue, size_type num_iters, size_type num_rows, size_type num_cols, - ValueType *next_krylov_basis, size_type stride_next_krylov, - const Accessor3d krylov_bases, const ValueType *hessenberg_iter, - size_type stride_hessenberg, const stopping_status *stop_status) + ValueType* next_krylov_basis, size_type stride_next_krylov, + const Accessor3d krylov_bases, const ValueType* hessenberg_iter, + size_type stride_hessenberg, const stopping_status* stop_status) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { update_next_krylov_kernel( @@ -687,11 +687,11 @@ void update_next_krylov_kernel( template void update_next_krylov_and_add_kernel( size_type num_iters, size_type num_rows, size_type num_cols, - ValueType *__restrict__ next_krylov_basis, size_type stride_next_krylov, - const Accessor3d krylov_bases, ValueType *__restrict__ hessenberg_iter, - size_type stride_hessenberg, const ValueType *__restrict__ buffer_iter, - size_type stride_buffer, const stopping_status *__restrict__ stop_status, - const stopping_status *__restrict__ reorth_status, + ValueType* __restrict__ next_krylov_basis, size_type stride_next_krylov, + const Accessor3d krylov_bases, ValueType* __restrict__ hessenberg_iter, + size_type stride_hessenberg, const ValueType* __restrict__ buffer_iter, + size_type stride_buffer, const stopping_status* __restrict__ stop_status, + const stopping_status* __restrict__ reorth_status, sycl::nd_item<3> item_ct1) { const auto global_id = thread::get_thread_id_flat(item_ct1); @@ -718,15 +718,15 @@ void update_next_krylov_and_add_kernel( template void update_next_krylov_and_add_kernel( - dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue, + dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue, size_type num_iters, size_type num_rows, size_type num_cols, - ValueType *next_krylov_basis, size_type stride_next_krylov, - const Accessor3d krylov_bases, ValueType *hessenberg_iter, - size_type stride_hessenberg, const ValueType *buffer_iter, - size_type stride_buffer, const stopping_status *stop_status, - const stopping_status *reorth_status) + ValueType* next_krylov_basis, size_type stride_next_krylov, + const Accessor3d krylov_bases, ValueType* hessenberg_iter, + size_type stride_hessenberg, const ValueType* buffer_iter, + size_type stride_buffer, const stopping_status* stop_status, + const stopping_status* reorth_status) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { update_next_krylov_and_add_kernel( @@ -742,12 +742,12 @@ void update_next_krylov_and_add_kernel( // Must be called with at least `num_rhs` threads template void check_arnoldi_norms( - size_type num_rhs, remove_complex *__restrict__ arnoldi_norm, - size_type stride_norm, ValueType *__restrict__ hessenberg_iter, + size_type num_rhs, remove_complex* __restrict__ arnoldi_norm, + size_type stride_norm, ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg, size_type iter, Accessor3d krylov_bases, - const stopping_status *__restrict__ stop_status, - stopping_status *__restrict__ reorth_status, - size_type *__restrict__ num_reorth, sycl::nd_item<3> item_ct1) + const stopping_status* __restrict__ stop_status, + stopping_status* __restrict__ reorth_status, + size_type* __restrict__ num_reorth, sycl::nd_item<3> item_ct1) { const remove_complex eta_squared = 1.0 / 2.0; const auto col_idx = thread::get_thread_id_flat(item_ct1); @@ -775,15 +775,15 @@ void check_arnoldi_norms( template void check_arnoldi_norms(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type num_rhs, - remove_complex *arnoldi_norm, - size_type stride_norm, ValueType *hessenberg_iter, + sycl::queue* queue, size_type num_rhs, + remove_complex* arnoldi_norm, + size_type stride_norm, ValueType* hessenberg_iter, size_type stride_hessenberg, size_type iter, Accessor3d krylov_bases, - const stopping_status *stop_status, - stopping_status *reorth_status, size_type *num_reorth) + const stopping_status* stop_status, + stopping_status* reorth_status, size_type* num_reorth) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { check_arnoldi_norms( @@ -797,9 +797,9 @@ void check_arnoldi_norms(dim3 grid, dim3 block, size_type dynamic_shared_memory, template void set_scalar_kernel(size_type num_rhs, size_type num_blocks, - const RealValueType *__restrict__ residual_norm, + const RealValueType* __restrict__ residual_norm, size_type stride_residual, - const RealValueType *__restrict__ arnoldi_inf, + const RealValueType* __restrict__ arnoldi_inf, size_type stride_inf, Accessor3d krylov_bases, sycl::nd_item<3> item_ct1) { @@ -828,13 +828,13 @@ void set_scalar_kernel(size_type num_rhs, size_type num_blocks, template void set_scalar_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type num_rhs, - size_type num_blocks, const RealValueType *residual_norm, + sycl::queue* queue, size_type num_rhs, + size_type num_blocks, const RealValueType* residual_norm, size_type stride_residual, - const RealValueType *arnoldi_inf, size_type stride_inf, + const RealValueType* arnoldi_inf, size_type stride_inf, Accessor3d krylov_bases) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { set_scalar_kernel( @@ -850,10 +850,10 @@ void set_scalar_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, template void update_krylov_next_krylov_kernel( size_type iter, size_type num_rows, size_type num_cols, - ValueType *__restrict__ next_krylov_basis, size_type stride_next_krylov, - Accessor3d krylov_bases, const ValueType *__restrict__ hessenberg_iter, + ValueType* __restrict__ next_krylov_basis, size_type stride_next_krylov, + Accessor3d krylov_bases, const ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg, - const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1) + const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1) { const auto global_id = thread::get_thread_id_flat(item_ct1); const auto row_idx = global_id / stride_next_krylov; @@ -875,13 +875,13 @@ void update_krylov_next_krylov_kernel( template void update_krylov_next_krylov_kernel( - dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue, + dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue, size_type iter, size_type num_rows, size_type num_cols, - ValueType *next_krylov_basis, size_type stride_next_krylov, - Accessor3d krylov_bases, const ValueType *hessenberg_iter, - size_type stride_hessenberg, const stopping_status *stop_status) + ValueType* next_krylov_basis, size_type stride_next_krylov, + Accessor3d krylov_bases, const ValueType* hessenberg_iter, + size_type stride_hessenberg, const stopping_status* stop_status) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { update_krylov_next_krylov_kernel( @@ -898,10 +898,10 @@ void update_krylov_next_krylov_kernel( template void calculate_Qy_kernel(size_type num_rows, size_type num_cols, const Accessor3d krylov_bases, - const ValueType *__restrict__ y, size_type stride_y, - ValueType *__restrict__ before_preconditioner, + const ValueType* __restrict__ y, size_type stride_y, + ValueType* __restrict__ before_preconditioner, size_type stride_preconditioner, - const size_type *__restrict__ final_iter_nums, + const size_type* __restrict__ final_iter_nums, sycl::nd_item<3> item_ct1) { const auto global_id = thread::get_thread_id_flat(item_ct1); @@ -919,14 +919,14 @@ void calculate_Qy_kernel(size_type num_rows, size_type num_cols, template void calculate_Qy_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type num_rows, + sycl::queue* queue, size_type num_rows, size_type num_cols, const Accessor3d krylov_bases, - const ValueType *y, size_type stride_y, - ValueType *before_preconditioner, + const ValueType* y, size_type stride_y, + ValueType* before_preconditioner, size_type stride_preconditioner, - const size_type *final_iter_nums) + const size_type* final_iter_nums) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { calculate_Qy_kernel( @@ -941,7 +941,7 @@ void calculate_Qy_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, // Specialization, so the Accessor can use the same function as regular pointers template GKO_INLINE auto as_dpcpp_accessor( - const acc::range> &acc) + const acc::range>& acc) { return acc::range>( acc.get_accessor().get_size(), acc.get_accessor().get_stored_data(), @@ -950,8 +950,8 @@ GKO_INLINE auto as_dpcpp_accessor( template GKO_INLINE auto as_dpcpp_accessor( - const acc::range> - &acc) + const acc::range>& + acc) { return acc::range>( acc.get_accessor().get_size(), acc.get_accessor().get_stored_data(), @@ -963,7 +963,7 @@ GKO_INLINE auto as_dpcpp_accessor( template void zero_matrix(std::shared_ptr exec, size_type m, - size_type n, size_type stride, ValueType *array) + size_type n, size_type stride, ValueType* array) { const dim3 block_size(default_block_size, 1, 1); const dim3 grid_size(ceildiv(n, block_size.x), 1, 1); @@ -974,11 +974,11 @@ void zero_matrix(std::shared_ptr exec, size_type m, template void initialize_1(std::shared_ptr exec, - const matrix::Dense *b, - matrix::Dense *residual, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - Array *stop_status, size_type krylov_dim) + const matrix::Dense* b, + matrix::Dense* residual, + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + Array* stop_status, size_type krylov_dim) { const auto num_threads = std::max(b->get_size()[0] * b->get_stride(), krylov_dim * b->get_size()[1]); @@ -1000,13 +1000,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CB_GMRES_INITIALIZE_1_KERNEL); template void initialize_2(std::shared_ptr exec, - const matrix::Dense *residual, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - matrix::Dense> *arnoldi_norm, + const matrix::Dense* residual, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + matrix::Dense>* arnoldi_norm, Accessor3d krylov_bases, - matrix::Dense *next_krylov_basis, - Array *final_iter_nums, size_type krylov_dim) + matrix::Dense* next_krylov_basis, + Array* final_iter_nums, size_type krylov_dim) { constexpr bool use_scalar = gko::cb_gmres::detail::has_3d_scaled_accessor::value; @@ -1067,14 +1067,14 @@ GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE( template void finish_arnoldi_CGS(std::shared_ptr exec, - matrix::Dense *next_krylov_basis, + matrix::Dense* next_krylov_basis, Accessor3dim krylov_bases, - matrix::Dense *hessenberg_iter, - matrix::Dense *buffer_iter, - matrix::Dense> *arnoldi_norm, - size_type iter, const stopping_status *stop_status, - stopping_status *reorth_status, - Array *num_reorth) + matrix::Dense* hessenberg_iter, + matrix::Dense* buffer_iter, + matrix::Dense>* arnoldi_norm, + size_type iter, const stopping_status* stop_status, + stopping_status* reorth_status, + Array* num_reorth) { using non_complex = remove_complex; // optimization parameter @@ -1225,12 +1225,12 @@ void finish_arnoldi_CGS(std::shared_ptr exec, template void givens_rotation(std::shared_ptr exec, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense *hessenberg_iter, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - size_type iter, const Array *stop_status) + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense* hessenberg_iter, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + size_type iter, const Array* stop_status) { // TODO: tune block_size for optimal performance constexpr auto block_size = default_block_size; @@ -1252,17 +1252,17 @@ void givens_rotation(std::shared_ptr exec, template void step_1(std::shared_ptr exec, - matrix::Dense *next_krylov_basis, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - Accessor3d krylov_bases, matrix::Dense *hessenberg_iter, - matrix::Dense *buffer_iter, - matrix::Dense> *arnoldi_norm, - size_type iter, Array *final_iter_nums, - const Array *stop_status, - Array *reorth_status, Array *num_reorth) + matrix::Dense* next_krylov_basis, + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + Accessor3d krylov_bases, matrix::Dense* hessenberg_iter, + matrix::Dense* buffer_iter, + matrix::Dense>* arnoldi_norm, + size_type iter, Array* final_iter_nums, + const Array* stop_status, + Array* reorth_status, Array* num_reorth) { increase_final_iteration_numbers_kernel( static_cast( @@ -1283,9 +1283,9 @@ GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_STEP_1_KERNEL); template void solve_upper_triangular( std::shared_ptr exec, - const matrix::Dense *residual_norm_collection, - const matrix::Dense *hessenberg, matrix::Dense *y, - const Array *final_iter_nums) + const matrix::Dense* residual_norm_collection, + const matrix::Dense* hessenberg, matrix::Dense* y, + const Array* final_iter_nums) { // TODO: tune block_size for optimal performance constexpr auto block_size = default_block_size; @@ -1306,9 +1306,9 @@ void solve_upper_triangular( template void calculate_qy(std::shared_ptr exec, ConstAccessor3d krylov_bases, size_type num_krylov_bases, - const matrix::Dense *y, - matrix::Dense *before_preconditioner, - const Array *final_iter_nums) + const matrix::Dense* y, + matrix::Dense* before_preconditioner, + const Array* final_iter_nums) { const auto num_rows = before_preconditioner->get_size()[0]; const auto num_cols = before_preconditioner->get_size()[1]; @@ -1335,12 +1335,12 @@ void calculate_qy(std::shared_ptr exec, template void step_2(std::shared_ptr exec, - const matrix::Dense *residual_norm_collection, + const matrix::Dense* residual_norm_collection, ConstAccessor3d krylov_bases, - const matrix::Dense *hessenberg, - matrix::Dense *y, - matrix::Dense *before_preconditioner, - const Array *final_iter_nums) + const matrix::Dense* hessenberg, + matrix::Dense* y, + matrix::Dense* before_preconditioner, + const Array* final_iter_nums) { // since hessenberg has dims: iters x iters * num_rhs // krylov_bases has dims: (iters + 1) x sysmtx[0] x num_rhs diff --git a/dpcpp/solver/gmres_kernels.dp.cpp b/dpcpp/solver/gmres_kernels.dp.cpp index 55369b09e9a..ae8516e75a5 100644 --- a/dpcpp/solver/gmres_kernels.dp.cpp +++ b/dpcpp/solver/gmres_kernels.dp.cpp @@ -80,11 +80,11 @@ constexpr int default_dot_size = default_dot_dim * default_dot_dim; template void initialize_2_2_kernel( size_type num_rows, size_type num_rhs, - const ValueType *__restrict__ residual, size_type stride_residual, - const remove_complex *__restrict__ residual_norm, - ValueType *__restrict__ residual_norm_collection, - ValueType *__restrict__ krylov_bases, size_type stride_krylov, - size_type *__restrict__ final_iter_nums, sycl::nd_item<3> item_ct1) + const ValueType* __restrict__ residual, size_type stride_residual, + const remove_complex* __restrict__ residual_norm, + ValueType* __restrict__ residual_norm_collection, + ValueType* __restrict__ krylov_bases, size_type stride_krylov, + size_type* __restrict__ final_iter_nums, sycl::nd_item<3> item_ct1) { const auto global_id = thread::get_thread_id_flat(item_ct1); const auto row_idx = global_id / num_rhs; @@ -104,15 +104,15 @@ void initialize_2_2_kernel( template void initialize_2_2_kernel(dim3 grid, dim3 block, - size_type dynamic_shared_memory, sycl::queue *queue, + size_type dynamic_shared_memory, sycl::queue* queue, size_type num_rows, size_type num_rhs, - const ValueType *residual, size_type stride_residual, - const remove_complex *residual_norm, - ValueType *residual_norm_collection, - ValueType *krylov_bases, size_type stride_krylov, - size_type *final_iter_nums) + const ValueType* residual, size_type stride_residual, + const remove_complex* residual_norm, + ValueType* residual_norm_collection, + ValueType* krylov_bases, size_type stride_krylov, + size_type* final_iter_nums) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { initialize_2_2_kernel( @@ -125,8 +125,8 @@ void initialize_2_2_kernel(dim3 grid, dim3 block, void increase_final_iteration_numbers_kernel( - size_type *__restrict__ final_iter_nums, - const stopping_status *__restrict__ stop_status, size_type total_number, + size_type* __restrict__ final_iter_nums, + const stopping_status* __restrict__ stop_status, size_type total_number, sycl::nd_item<3> item_ct1) { const auto global_id = thread::get_thread_id_flat(item_ct1); @@ -142,12 +142,12 @@ GKO_ENABLE_DEFAULT_HOST(increase_final_iteration_numbers_kernel, template void multidot_kernel( size_type k, size_type num_rows, size_type num_cols, - const ValueType *__restrict__ krylov_bases, - const ValueType *__restrict__ next_krylov_basis, size_type stride_krylov, - ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, - const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1, - UninitializedArray - *reduction_helper_array) + const ValueType* __restrict__ krylov_bases, + const ValueType* __restrict__ next_krylov_basis, size_type stride_krylov, + ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1, + UninitializedArray* + reduction_helper_array) { const auto tidx = item_ct1.get_local_id(2); const auto tidy = item_ct1.get_local_id(1); @@ -160,7 +160,7 @@ void multidot_kernel( // Used that way to get around dynamic initialization warning and // template error when using `reduction_helper_array` directly in `reduce` - ValueType *__restrict__ reduction_helper = (*reduction_helper_array); + ValueType* __restrict__ reduction_helper = (*reduction_helper_array); ValueType local_res = zero(); if (col_idx < num_cols && !stop_status[col_idx].has_stopped()) { @@ -178,7 +178,7 @@ void multidot_kernel( group::this_thread_block(item_ct1)); const auto sum = ::gko::kernels::dpcpp::reduce( tile_block, local_res, - [](const ValueType &a, const ValueType &b) { return a + b; }); + [](const ValueType& a, const ValueType& b) { return a + b; }); const auto new_col_idx = item_ct1.get_group(2) * default_dot_dim + tidy; if (tidx == 0 && new_col_idx < num_cols && !stop_status[new_col_idx].has_stopped()) { @@ -189,15 +189,15 @@ void multidot_kernel( template void multidot_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type k, size_type num_rows, - size_type num_cols, const ValueType *krylov_bases, - const ValueType *next_krylov_basis, - size_type stride_krylov, ValueType *hessenberg_iter, + sycl::queue* queue, size_type k, size_type num_rows, + size_type num_cols, const ValueType* krylov_bases, + const ValueType* next_krylov_basis, + size_type stride_krylov, ValueType* hessenberg_iter, size_type stride_hessenberg, - const stopping_status *stop_status) + const stopping_status* stop_status) { - queue->submit([&](sycl::handler &cgh) { - sycl::accessorsubmit([&](sycl::handler& cgh) { + sycl::accessor, 0, sycl::access_mode::read_write, sycl::access::target::local> @@ -209,8 +209,8 @@ void multidot_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, k, num_rows, num_cols, krylov_bases, next_krylov_basis, stride_krylov, hessenberg_iter, stride_hessenberg, stop_status, item_ct1, - (UninitializedArray *) + (UninitializedArray*) reduction_helper_array_acc_ct1.get_pointer()); }); }); @@ -222,10 +222,10 @@ void multidot_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, template void update_next_krylov_kernel( size_type k, size_type num_rows, size_type num_cols, - const ValueType *__restrict__ krylov_bases, - ValueType *__restrict__ next_krylov_basis, size_type stride_krylov, - const ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, - const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1) + const ValueType* __restrict__ krylov_bases, + ValueType* __restrict__ next_krylov_basis, size_type stride_krylov, + const ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1) { const auto global_id = thread::get_thread_id_flat(item_ct1); const auto row_idx = global_id / stride_krylov; @@ -244,13 +244,13 @@ void update_next_krylov_kernel( template void update_next_krylov_kernel( - dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue, + dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue, size_type k, size_type num_rows, size_type num_cols, - const ValueType *krylov_bases, ValueType *next_krylov_basis, - size_type stride_krylov, const ValueType *hessenberg_iter, - size_type stride_hessenberg, const stopping_status *stop_status) + const ValueType* krylov_bases, ValueType* next_krylov_basis, + size_type stride_krylov, const ValueType* hessenberg_iter, + size_type stride_hessenberg, const stopping_status* stop_status) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { update_next_krylov_kernel( @@ -267,11 +267,11 @@ void update_next_krylov_kernel( template void update_hessenberg_2_kernel( size_type iter, size_type num_rows, size_type num_cols, - const ValueType *__restrict__ next_krylov_basis, - size_type stride_next_krylov, ValueType *__restrict__ hessenberg_iter, + const ValueType* __restrict__ next_krylov_basis, + size_type stride_next_krylov, ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg, - const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1, - UninitializedArray &reduction_helper_array) + const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1, + UninitializedArray& reduction_helper_array) { const auto tidx = item_ct1.get_local_id(2); const auto col_idx = item_ct1.get_group(2); @@ -279,7 +279,7 @@ void update_hessenberg_2_kernel( // Used that way to get around dynamic initialization warning and // template error when using `reduction_helper_array` directly in `reduce` - ValueType *__restrict__ reduction_helper = reduction_helper_array; + ValueType* __restrict__ reduction_helper = reduction_helper_array; if (col_idx < num_cols && !stop_status[col_idx].has_stopped()) { ValueType local_res{}; @@ -294,7 +294,7 @@ void update_hessenberg_2_kernel( // Perform thread block reduction. Result is in reduction_helper[0] reduce(group::this_thread_block(item_ct1), reduction_helper, - [](const ValueType &a, const ValueType &b) { return a + b; }); + [](const ValueType& a, const ValueType& b) { return a + b; }); if (tidx == 0) { hessenberg_iter[(iter + 1) * stride_hessenberg + col_idx] = @@ -305,13 +305,13 @@ void update_hessenberg_2_kernel( template void update_hessenberg_2_kernel( - dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue, + dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue, size_type iter, size_type num_rows, size_type num_cols, - const ValueType *next_krylov_basis, size_type stride_next_krylov, - ValueType *hessenberg_iter, size_type stride_hessenberg, - const stopping_status *stop_status) + const ValueType* next_krylov_basis, size_type stride_next_krylov, + ValueType* hessenberg_iter, size_type stride_hessenberg, + const stopping_status* stop_status) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { sycl::accessor, 0, sycl::access_mode::read_write, sycl::access::target::local> @@ -334,9 +334,9 @@ void update_hessenberg_2_kernel( template void update_krylov_kernel( size_type iter, size_type num_rows, size_type num_cols, - ValueType *__restrict__ krylov_bases, size_type stride_krylov, - const ValueType *__restrict__ hessenberg_iter, size_type stride_hessenberg, - const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1) + ValueType* __restrict__ krylov_bases, size_type stride_krylov, + const ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg, + const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1) { const auto global_id = thread::get_thread_id_flat(item_ct1); const auto row_idx = global_id / stride_krylov; @@ -354,15 +354,15 @@ void update_krylov_kernel( template void update_krylov_kernel(dim3 grid, dim3 block, - size_type dynamic_shared_memory, sycl::queue *queue, + size_type dynamic_shared_memory, sycl::queue* queue, size_type iter, size_type num_rows, - size_type num_cols, ValueType *krylov_bases, + size_type num_cols, ValueType* krylov_bases, size_type stride_krylov, - const ValueType *hessenberg_iter, + const ValueType* hessenberg_iter, size_type stride_hessenberg, - const stopping_status *stop_status) + const stopping_status* stop_status) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { update_krylov_kernel( @@ -378,12 +378,12 @@ void update_krylov_kernel(dim3 grid, dim3 block, template void calculate_Qy_kernel(size_type num_rows, size_type num_cols, size_type num_rhs, - const ValueType *__restrict__ krylov_bases, + const ValueType* __restrict__ krylov_bases, size_type stride_krylov, - const ValueType *__restrict__ y, size_type stride_y, - ValueType *__restrict__ before_preconditioner, + const ValueType* __restrict__ y, size_type stride_y, + ValueType* __restrict__ before_preconditioner, size_type stride_preconditioner, - const size_type *__restrict__ final_iter_nums, + const size_type* __restrict__ final_iter_nums, sycl::nd_item<3> item_ct1) { const auto global_id = thread::get_thread_id_flat(item_ct1); @@ -404,15 +404,15 @@ void calculate_Qy_kernel(size_type num_rows, size_type num_cols, template void calculate_Qy_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, - sycl::queue *queue, size_type num_rows, + sycl::queue* queue, size_type num_rows, size_type num_cols, size_type num_rhs, - const ValueType *krylov_bases, size_type stride_krylov, - const ValueType *y, size_type stride_y, - ValueType *before_preconditioner, + const ValueType* krylov_bases, size_type stride_krylov, + const ValueType* y, size_type stride_y, + ValueType* before_preconditioner, size_type stride_preconditioner, - const size_type *final_iter_nums) + const size_type* final_iter_nums) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { calculate_Qy_kernel( @@ -426,11 +426,11 @@ void calculate_Qy_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, template void initialize_1(std::shared_ptr exec, - const matrix::Dense *b, - matrix::Dense *residual, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - Array *stop_status, size_type krylov_dim) + const matrix::Dense* b, + matrix::Dense* residual, + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + Array* stop_status, size_type krylov_dim) { const auto num_threads = std::max(b->get_size()[0] * b->get_stride(), krylov_dim * b->get_size()[1]); @@ -452,11 +452,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_INITIALIZE_1_KERNEL); template void initialize_2(std::shared_ptr exec, - const matrix::Dense *residual, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - matrix::Dense *krylov_bases, - Array *final_iter_nums, size_type krylov_dim) + const matrix::Dense* residual, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + matrix::Dense* krylov_bases, + Array* final_iter_nums, size_type krylov_dim) { const auto num_rows = residual->get_size()[0]; const auto num_rhs = residual->get_size()[1]; @@ -484,9 +484,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_INITIALIZE_2_KERNEL); template void finish_arnoldi(std::shared_ptr exec, - size_type num_rows, matrix::Dense *krylov_bases, - matrix::Dense *hessenberg_iter, size_type iter, - const stopping_status *stop_status) + size_type num_rows, matrix::Dense* krylov_bases, + matrix::Dense* hessenberg_iter, size_type iter, + const stopping_status* stop_status) { const auto stride_krylov = krylov_bases->get_stride(); const auto stride_hessenberg = hessenberg_iter->get_stride(); @@ -550,12 +550,12 @@ void finish_arnoldi(std::shared_ptr exec, template void givens_rotation(std::shared_ptr exec, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense *hessenberg_iter, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - size_type iter, const Array *stop_status) + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense* hessenberg_iter, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + size_type iter, const Array* stop_status) { // TODO: tune block_size for optimal performance constexpr auto block_size = default_block_size; @@ -577,14 +577,14 @@ void givens_rotation(std::shared_ptr exec, template void step_1(std::shared_ptr exec, size_type num_rows, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - matrix::Dense *krylov_bases, - matrix::Dense *hessenberg_iter, size_type iter, - Array *final_iter_nums, - const Array *stop_status) + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + matrix::Dense* krylov_bases, + matrix::Dense* hessenberg_iter, size_type iter, + Array* final_iter_nums, + const Array* stop_status) { increase_final_iteration_numbers_kernel( static_cast( @@ -603,9 +603,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_STEP_1_KERNEL); template void solve_upper_triangular( std::shared_ptr exec, - const matrix::Dense *residual_norm_collection, - const matrix::Dense *hessenberg, matrix::Dense *y, - const Array *final_iter_nums) + const matrix::Dense* residual_norm_collection, + const matrix::Dense* hessenberg, matrix::Dense* y, + const Array* final_iter_nums) { // TODO: tune block_size for optimal performance constexpr auto block_size = default_block_size; @@ -625,10 +625,10 @@ void solve_upper_triangular( template void calculate_qy(std::shared_ptr exec, - const matrix::Dense *krylov_bases, - const matrix::Dense *y, - matrix::Dense *before_preconditioner, - const Array *final_iter_nums) + const matrix::Dense* krylov_bases, + const matrix::Dense* y, + matrix::Dense* before_preconditioner, + const Array* final_iter_nums) { const auto num_rows = before_preconditioner->get_size()[0]; const auto num_cols = krylov_bases->get_size()[1]; @@ -657,12 +657,12 @@ void calculate_qy(std::shared_ptr exec, template void step_2(std::shared_ptr exec, - const matrix::Dense *residual_norm_collection, - const matrix::Dense *krylov_bases, - const matrix::Dense *hessenberg, - matrix::Dense *y, - matrix::Dense *before_preconditioner, - const Array *final_iter_nums) + const matrix::Dense* residual_norm_collection, + const matrix::Dense* krylov_bases, + const matrix::Dense* hessenberg, + matrix::Dense* y, + matrix::Dense* before_preconditioner, + const Array* final_iter_nums) { solve_upper_triangular(exec, residual_norm_collection, hessenberg, y, final_iter_nums); diff --git a/dpcpp/solver/idr_kernels.dp.cpp b/dpcpp/solver/idr_kernels.dp.cpp index a4f18019128..72948de76be 100644 --- a/dpcpp/solver/idr_kernels.dp.cpp +++ b/dpcpp/solver/idr_kernels.dp.cpp @@ -73,8 +73,8 @@ constexpr int default_dot_size = default_dot_dim * default_dot_dim; template void initialize_m_kernel(size_type subspace_dim, size_type nrhs, - ValueType *__restrict__ m_values, size_type m_stride, - stopping_status *__restrict__ stop_status, + ValueType* __restrict__ m_values, size_type m_stride, + stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1) { const auto global_id = thread::get_thread_id_flat(item_ct1); @@ -93,11 +93,11 @@ void initialize_m_kernel(size_type subspace_dim, size_type nrhs, template void initialize_m_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory, - sycl::queue *stream, size_type subspace_dim, - size_type nrhs, ValueType *m_values, - size_type m_stride, stopping_status *stop_status) + sycl::queue* stream, size_type subspace_dim, + size_type nrhs, ValueType* m_values, + size_type m_stride, stopping_status* stop_status) { - stream->submit([&](sycl::handler &cgh) { + stream->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { initialize_m_kernel(subspace_dim, nrhs, m_values, m_stride, @@ -109,16 +109,16 @@ void initialize_m_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory, template void orthonormalize_subspace_vectors_kernel( - size_type num_rows, size_type num_cols, ValueType *__restrict__ values, + size_type num_rows, size_type num_cols, ValueType* __restrict__ values, size_type stride, sycl::nd_item<3> item_ct1, - UninitializedArray &reduction_helper_array) + UninitializedArray& reduction_helper_array) { const auto tidx = thread::get_thread_id_flat(item_ct1); // they are not be used in the same time. - ValueType *reduction_helper = reduction_helper_array; + ValueType* reduction_helper = reduction_helper_array; auto reduction_helper_real = - reinterpret_cast *>(reduction_helper); + reinterpret_cast*>(reduction_helper); for (size_type row = 0; row < num_rows; row++) { for (size_type i = 0; i < row; i++) { @@ -132,7 +132,7 @@ void orthonormalize_subspace_vectors_kernel( reduction_helper[tidx] = dot; ::gko::kernels::dpcpp::reduce( group::this_thread_block(item_ct1), reduction_helper, - [](const ValueType &a, const ValueType &b) { return a + b; }); + [](const ValueType& a, const ValueType& b) { return a + b; }); item_ct1.barrier(sycl::access::fence_space::local_space); dot = reduction_helper[0]; @@ -151,8 +151,8 @@ void orthonormalize_subspace_vectors_kernel( reduction_helper_real[tidx] = norm; ::gko::kernels::dpcpp::reduce( group::this_thread_block(item_ct1), reduction_helper_real, - [](const remove_complex &a, - const remove_complex &b) { return a + b; }); + [](const remove_complex& a, + const remove_complex& b) { return a + b; }); item_ct1.barrier(sycl::access::fence_space::local_space); norm = std::sqrt(reduction_helper_real[0]); @@ -164,10 +164,10 @@ void orthonormalize_subspace_vectors_kernel( template void orthonormalize_subspace_vectors_kernel( - dim3 grid, dim3 block, size_t dynamic_shared_memory, sycl::queue *stream, - size_type num_rows, size_type num_cols, ValueType *values, size_type stride) + dim3 grid, dim3 block, size_t dynamic_shared_memory, sycl::queue* stream, + size_type num_rows, size_type num_cols, ValueType* values, size_type stride) { - stream->submit([&](sycl::handler &cgh) { + stream->submit([&](sycl::handler& cgh) { sycl::accessor, 0, sycl::access_mode::read_write, sycl::access::target::local> @@ -186,10 +186,10 @@ void orthonormalize_subspace_vectors_kernel( template void solve_lower_triangular_kernel( size_type subspace_dim, size_type nrhs, - const ValueType *__restrict__ m_values, size_type m_stride, - const ValueType *__restrict__ f_values, size_type f_stride, - ValueType *__restrict__ c_values, size_type c_stride, - const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1) + const ValueType* __restrict__ m_values, size_type m_stride, + const ValueType* __restrict__ f_values, size_type f_stride, + ValueType* __restrict__ c_values, size_type c_stride, + const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1) { const auto global_id = thread::get_thread_id_flat(item_ct1); @@ -212,12 +212,12 @@ void solve_lower_triangular_kernel( template void solve_lower_triangular_kernel( - dim3 grid, dim3 block, size_t dynamic_shared_memory, sycl::queue *stream, - size_type subspace_dim, size_type nrhs, const ValueType *m_values, - size_type m_stride, const ValueType *f_values, size_type f_stride, - ValueType *c_values, size_type c_stride, const stopping_status *stop_status) + dim3 grid, dim3 block, size_t dynamic_shared_memory, sycl::queue* stream, + size_type subspace_dim, size_type nrhs, const ValueType* m_values, + size_type m_stride, const ValueType* f_values, size_type f_stride, + ValueType* c_values, size_type c_stride, const stopping_status* stop_status) { - stream->submit([&](sycl::handler &cgh) { + stream->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { solve_lower_triangular_kernel( @@ -231,12 +231,12 @@ void solve_lower_triangular_kernel( template void step_1_kernel(size_type k, size_type num_rows, size_type subspace_dim, size_type nrhs, - const ValueType *__restrict__ residual_values, + const ValueType* __restrict__ residual_values, size_type residual_stride, - const ValueType *__restrict__ c_values, size_type c_stride, - const ValueType *__restrict__ g_values, size_type g_stride, - ValueType *__restrict__ v_values, size_type v_stride, - const stopping_status *__restrict__ stop_status, + const ValueType* __restrict__ c_values, size_type c_stride, + const ValueType* __restrict__ g_values, size_type g_stride, + ValueType* __restrict__ v_values, size_type v_stride, + const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1) { const auto global_id = thread::get_thread_id_flat(item_ct1); @@ -259,15 +259,15 @@ void step_1_kernel(size_type k, size_type num_rows, size_type subspace_dim, template void step_1_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory, - sycl::queue *stream, size_type k, size_type num_rows, + sycl::queue* stream, size_type k, size_type num_rows, size_type subspace_dim, size_type nrhs, - const ValueType *residual_values, size_type residual_stride, - const ValueType *c_values, size_type c_stride, - const ValueType *g_values, size_type g_stride, - ValueType *v_values, size_type v_stride, - const stopping_status *stop_status) + const ValueType* residual_values, size_type residual_stride, + const ValueType* c_values, size_type c_stride, + const ValueType* g_values, size_type g_stride, + ValueType* v_values, size_type v_stride, + const stopping_status* stop_status) { - stream->submit([&](sycl::handler &cgh) { + stream->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { step_1_kernel(k, num_rows, subspace_dim, nrhs, residual_values, @@ -281,11 +281,11 @@ void step_1_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory, template void step_2_kernel(size_type k, size_type num_rows, size_type subspace_dim, - size_type nrhs, const ValueType *__restrict__ omega_values, - const ValueType *__restrict__ v_values, size_type v_stride, - const ValueType *__restrict__ c_values, size_type c_stride, - ValueType *__restrict__ u_values, size_type u_stride, - const stopping_status *__restrict__ stop_status, + size_type nrhs, const ValueType* __restrict__ omega_values, + const ValueType* __restrict__ v_values, size_type v_stride, + const ValueType* __restrict__ c_values, size_type c_stride, + ValueType* __restrict__ u_values, size_type u_stride, + const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1) { const auto global_id = thread::get_thread_id_flat(item_ct1); @@ -308,14 +308,14 @@ void step_2_kernel(size_type k, size_type num_rows, size_type subspace_dim, template void step_2_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory, - sycl::queue *stream, size_type k, size_type num_rows, + sycl::queue* stream, size_type k, size_type num_rows, size_type subspace_dim, size_type nrhs, - const ValueType *omega_values, const ValueType *v_values, - size_type v_stride, const ValueType *c_values, - size_type c_stride, ValueType *u_values, size_type u_stride, - const stopping_status *stop_status) + const ValueType* omega_values, const ValueType* v_values, + size_type v_stride, const ValueType* c_values, + size_type c_stride, ValueType* u_values, size_type u_stride, + const stopping_status* stop_status) { - stream->submit([&](sycl::handler &cgh) { + stream->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { step_2_kernel(k, num_rows, subspace_dim, nrhs, omega_values, @@ -328,12 +328,12 @@ void step_2_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory, template void multidot_kernel( - size_type num_rows, size_type nrhs, const ValueType *__restrict__ p_i, - const ValueType *__restrict__ g_k, size_type g_k_stride, - ValueType *__restrict__ alpha, - const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1, - UninitializedArray - &reduction_helper_array) + size_type num_rows, size_type nrhs, const ValueType* __restrict__ p_i, + const ValueType* __restrict__ g_k, size_type g_k_stride, + ValueType* __restrict__ alpha, + const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1, + UninitializedArray& + reduction_helper_array) { const auto tidx = item_ct1.get_local_id(2); const auto tidy = item_ct1.get_local_id(1); @@ -345,7 +345,7 @@ void multidot_kernel( : (item_ct1.get_group(1) + 1) * num; // Used that way to get around dynamic initialization warning and // template error when using `reduction_helper_array` directly in `reduce` - ValueType *__restrict__ reduction_helper = reduction_helper_array; + ValueType* __restrict__ reduction_helper = reduction_helper_array; ValueType local_res = zero(); if (rhs < nrhs && !stop_status[rhs].has_stopped()) { @@ -362,7 +362,7 @@ void multidot_kernel( group::this_thread_block(item_ct1)); const auto sum = ::gko::kernels::dpcpp::reduce( tile_block, local_res, - [](const ValueType &a, const ValueType &b) { return a + b; }); + [](const ValueType& a, const ValueType& b) { return a + b; }); const auto new_rhs = item_ct1.get_group(2) * default_dot_dim + tidy; if (tidx == 0 && new_rhs < nrhs && !stop_status[new_rhs].has_stopped()) { atomic_add(alpha + new_rhs, sum); @@ -371,13 +371,13 @@ void multidot_kernel( template void multidot_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory, - sycl::queue *stream, size_type num_rows, size_type nrhs, - const ValueType *p_i, const ValueType *g_k, - size_type g_k_stride, ValueType *alpha, - const stopping_status *stop_status) + sycl::queue* stream, size_type num_rows, size_type nrhs, + const ValueType* p_i, const ValueType* g_k, + size_type g_k_stride, ValueType* alpha, + const stopping_status* stop_status) { - stream->submit([&](sycl::handler &cgh) { - sycl::accessorsubmit([&](sycl::handler& cgh) { + sycl::accessor, 0, sycl::access_mode::read_write, sycl::access::target::local> @@ -396,11 +396,11 @@ void multidot_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory, template void update_g_k_and_u_kernel( size_type k, size_type i, size_type size, size_type nrhs, - const ValueType *__restrict__ alpha, const ValueType *__restrict__ m_values, - size_type m_stride, const ValueType *__restrict__ g_values, - size_type g_stride, ValueType *__restrict__ g_k_values, - size_type g_k_stride, ValueType *__restrict__ u_values, size_type u_stride, - const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1) + const ValueType* __restrict__ alpha, const ValueType* __restrict__ m_values, + size_type m_stride, const ValueType* __restrict__ g_values, + size_type g_stride, ValueType* __restrict__ g_k_values, + size_type g_k_stride, ValueType* __restrict__ u_values, size_type u_stride, + const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); const auto row = tidx / g_k_stride; @@ -421,16 +421,16 @@ void update_g_k_and_u_kernel( template void update_g_k_and_u_kernel(dim3 grid, dim3 block, - size_t dynamic_shared_memory, sycl::queue *stream, + size_t dynamic_shared_memory, sycl::queue* stream, size_type k, size_type i, size_type size, - size_type nrhs, const ValueType *alpha, - const ValueType *m_values, size_type m_stride, - const ValueType *g_values, size_type g_stride, - ValueType *g_k_values, size_type g_k_stride, - ValueType *u_values, size_type u_stride, - const stopping_status *stop_status) -{ - stream->submit([&](sycl::handler &cgh) { + size_type nrhs, const ValueType* alpha, + const ValueType* m_values, size_type m_stride, + const ValueType* g_values, size_type g_stride, + ValueType* g_k_values, size_type g_k_stride, + ValueType* u_values, size_type u_stride, + const stopping_status* stop_status) +{ + stream->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { update_g_k_and_u_kernel( @@ -444,10 +444,10 @@ void update_g_k_and_u_kernel(dim3 grid, dim3 block, template void update_g_kernel(size_type k, size_type size, size_type nrhs, - const ValueType *__restrict__ g_k_values, - size_type g_k_stride, ValueType *__restrict__ g_values, + const ValueType* __restrict__ g_k_values, + size_type g_k_stride, ValueType* __restrict__ g_values, size_type g_stride, - const stopping_status *__restrict__ stop_status, + const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1) { const auto tidx = thread::get_thread_id_flat(item_ct1); @@ -466,12 +466,12 @@ void update_g_kernel(size_type k, size_type size, size_type nrhs, template void update_g_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory, - sycl::queue *stream, size_type k, size_type size, - size_type nrhs, const ValueType *g_k_values, - size_type g_k_stride, ValueType *g_values, - size_type g_stride, const stopping_status *stop_status) + sycl::queue* stream, size_type k, size_type size, + size_type nrhs, const ValueType* g_k_values, + size_type g_k_stride, ValueType* g_values, + size_type g_stride, const stopping_status* stop_status) { - stream->submit([&](sycl::handler &cgh) { + stream->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { update_g_kernel(k, size, nrhs, g_k_values, @@ -485,13 +485,13 @@ void update_g_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory, template void update_x_r_and_f_kernel( size_type k, size_type size, size_type subspace_dim, size_type nrhs, - const ValueType *__restrict__ m_values, size_type m_stride, - const ValueType *__restrict__ g_values, size_type g_stride, - const ValueType *__restrict__ u_values, size_type u_stride, - ValueType *__restrict__ f_values, size_type f_stride, - ValueType *__restrict__ r_values, size_type r_stride, - ValueType *__restrict__ x_values, size_type x_stride, - const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1) + const ValueType* __restrict__ m_values, size_type m_stride, + const ValueType* __restrict__ g_values, size_type g_stride, + const ValueType* __restrict__ u_values, size_type u_stride, + ValueType* __restrict__ f_values, size_type f_stride, + ValueType* __restrict__ r_values, size_type r_stride, + ValueType* __restrict__ x_values, size_type x_stride, + const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1) { const auto global_id = thread::get_thread_id_flat(item_ct1); const auto row = global_id / x_stride; @@ -518,15 +518,15 @@ void update_x_r_and_f_kernel( template void update_x_r_and_f_kernel( - dim3 grid, dim3 block, size_t dynamic_shared_memory, sycl::queue *stream, + dim3 grid, dim3 block, size_t dynamic_shared_memory, sycl::queue* stream, size_type k, size_type size, size_type subspace_dim, size_type nrhs, - const ValueType *m_values, size_type m_stride, const ValueType *g_values, - size_type g_stride, const ValueType *u_values, size_type u_stride, - ValueType *f_values, size_type f_stride, ValueType *r_values, - size_type r_stride, ValueType *x_values, size_type x_stride, - const stopping_status *stop_status) + const ValueType* m_values, size_type m_stride, const ValueType* g_values, + size_type g_stride, const ValueType* u_values, size_type u_stride, + ValueType* f_values, size_type f_stride, ValueType* r_values, + size_type r_stride, ValueType* x_values, size_type x_stride, + const stopping_status* stop_status) { - stream->submit([&](sycl::handler &cgh) { + stream->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { update_x_r_and_f_kernel( @@ -541,10 +541,10 @@ void update_x_r_and_f_kernel( template void compute_omega_kernel( size_type nrhs, const remove_complex kappa, - const ValueType *__restrict__ tht, - const remove_complex *__restrict__ residual_norm, - ValueType *__restrict__ omega, - const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1) + const ValueType* __restrict__ tht, + const remove_complex* __restrict__ residual_norm, + ValueType* __restrict__ omega, + const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1) { const auto global_id = thread::get_thread_id_flat(item_ct1); @@ -566,13 +566,13 @@ void compute_omega_kernel( template void compute_omega_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory, - sycl::queue *stream, size_type nrhs, + sycl::queue* stream, size_type nrhs, const remove_complex kappa, - const ValueType *tht, - const remove_complex *residual_norm, - ValueType *omega, const stopping_status *stop_status) + const ValueType* tht, + const remove_complex* residual_norm, + ValueType* omega, const stopping_status* stop_status) { - stream->submit([&](sycl::handler &cgh) { + stream->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { compute_omega_kernel(nrhs, kappa, tht, residual_norm, omega, @@ -587,8 +587,8 @@ namespace { template void initialize_m(std::shared_ptr exec, - const size_type nrhs, matrix::Dense *m, - Array *stop_status) + const size_type nrhs, matrix::Dense* m, + Array* stop_status) { const auto subspace_dim = m->get_size()[0]; const auto m_stride = m->get_stride(); @@ -602,7 +602,7 @@ void initialize_m(std::shared_ptr exec, template void initialize_subspace_vectors(std::shared_ptr exec, - matrix::Dense *subspace_vectors, + matrix::Dense* subspace_vectors, bool deterministic) { if (deterministic) { @@ -613,12 +613,12 @@ void initialize_subspace_vectors(std::shared_ptr exec, subspace_vectors->read(subspace_vectors_data); } else { auto seed = time(NULL); - auto work = reinterpret_cast *>( + auto work = reinterpret_cast*>( subspace_vectors->get_values()); auto n = subspace_vectors->get_size()[0] * subspace_vectors->get_stride(); n = is_complex() ? 2 * n : n; - exec->get_queue()->submit([&](sycl::handler &cgh) { + exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>(n), [=](sycl::item<1> idx) { std::uint64_t offset = idx.get_linear_id(); oneapi::dpl::minstd_rand engine(seed, offset); @@ -635,7 +635,7 @@ void initialize_subspace_vectors(std::shared_ptr exec, template void orthonormalize_subspace_vectors(std::shared_ptr exec, - matrix::Dense *subspace_vectors) + matrix::Dense* subspace_vectors) { orthonormalize_subspace_vectors_kernel( 1, default_block_size, 0, exec->get_queue(), @@ -647,10 +647,10 @@ void orthonormalize_subspace_vectors(std::shared_ptr exec, template void solve_lower_triangular(std::shared_ptr exec, const size_type nrhs, - const matrix::Dense *m, - const matrix::Dense *f, - matrix::Dense *c, - const Array *stop_status) + const matrix::Dense* m, + const matrix::Dense* f, + matrix::Dense* c, + const Array* stop_status) { const auto subspace_dim = m->get_size()[0]; @@ -666,12 +666,12 @@ void solve_lower_triangular(std::shared_ptr exec, template void update_g_and_u(std::shared_ptr exec, const size_type nrhs, const size_type k, - const matrix::Dense *p, - const matrix::Dense *m, - matrix::Dense *alpha, - matrix::Dense *g, matrix::Dense *g_k, - matrix::Dense *u, - const Array *stop_status) + const matrix::Dense* p, + const matrix::Dense* m, + matrix::Dense* alpha, + matrix::Dense* g, matrix::Dense* g_k, + matrix::Dense* u, + const Array* stop_status) { const auto size = g->get_size()[0]; const auto p_stride = p->get_stride(); @@ -710,9 +710,9 @@ void update_g_and_u(std::shared_ptr exec, template void update_m(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense *p, - const matrix::Dense *g_k, matrix::Dense *m, - const Array *stop_status) + const size_type k, const matrix::Dense* p, + const matrix::Dense* g_k, matrix::Dense* m, + const Array* stop_status) { const auto size = g_k->get_size()[0]; const auto subspace_dim = m->get_size()[0]; @@ -743,12 +743,12 @@ void update_m(std::shared_ptr exec, const size_type nrhs, template void update_x_r_and_f(std::shared_ptr exec, const size_type nrhs, const size_type k, - const matrix::Dense *m, - const matrix::Dense *g, - const matrix::Dense *u, - matrix::Dense *f, matrix::Dense *r, - matrix::Dense *x, - const Array *stop_status) + const matrix::Dense* m, + const matrix::Dense* g, + const matrix::Dense* u, + matrix::Dense* f, matrix::Dense* r, + matrix::Dense* x, + const Array* stop_status) { const auto size = x->get_size()[0]; const auto subspace_dim = m->get_size()[0]; @@ -771,9 +771,9 @@ void update_x_r_and_f(std::shared_ptr exec, template void initialize(std::shared_ptr exec, const size_type nrhs, - matrix::Dense *m, - matrix::Dense *subspace_vectors, bool deterministic, - Array *stop_status) + matrix::Dense* m, + matrix::Dense* subspace_vectors, bool deterministic, + Array* stop_status) { initialize_m(exec, nrhs, m, stop_status); initialize_subspace_vectors(exec, subspace_vectors, deterministic); @@ -785,12 +785,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense *m, - const matrix::Dense *f, - const matrix::Dense *residual, - const matrix::Dense *g, matrix::Dense *c, - matrix::Dense *v, - const Array *stop_status) + const size_type k, const matrix::Dense* m, + const matrix::Dense* f, + const matrix::Dense* residual, + const matrix::Dense* g, matrix::Dense* c, + matrix::Dense* v, + const Array* stop_status) { solve_lower_triangular(exec, nrhs, m, f, c, stop_status); @@ -811,10 +811,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense *omega, - const matrix::Dense *preconditioned_vector, - const matrix::Dense *c, matrix::Dense *u, - const Array *stop_status) + const size_type k, const matrix::Dense* omega, + const matrix::Dense* preconditioned_vector, + const matrix::Dense* c, matrix::Dense* u, + const Array* stop_status) { const auto num_rows = preconditioned_vector->get_size()[0]; const auto subspace_dim = u->get_size()[1] / nrhs; @@ -833,12 +833,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_2_KERNEL); template void step_3(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense *p, - matrix::Dense *g, matrix::Dense *g_k, - matrix::Dense *u, matrix::Dense *m, - matrix::Dense *f, matrix::Dense *alpha, - matrix::Dense *residual, matrix::Dense *x, - const Array *stop_status) + const size_type k, const matrix::Dense* p, + matrix::Dense* g, matrix::Dense* g_k, + matrix::Dense* u, matrix::Dense* m, + matrix::Dense* f, matrix::Dense* alpha, + matrix::Dense* residual, matrix::Dense* x, + const Array* stop_status) { update_g_and_u(exec, nrhs, k, p, m, alpha, g, g_k, u, stop_status); update_m(exec, nrhs, k, p, g_k, m, stop_status); @@ -851,9 +851,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_3_KERNEL); template void compute_omega( std::shared_ptr exec, const size_type nrhs, - const remove_complex kappa, const matrix::Dense *tht, - const matrix::Dense> *residual_norm, - matrix::Dense *omega, const Array *stop_status) + const remove_complex kappa, const matrix::Dense* tht, + const matrix::Dense>* residual_norm, + matrix::Dense* omega, const Array* stop_status) { const auto grid_dim = ceildiv(nrhs, config::warp_size); compute_omega_kernel(grid_dim, config::warp_size, 0, exec->get_queue(), diff --git a/dpcpp/solver/lower_trs_kernels.dp.cpp b/dpcpp/solver/lower_trs_kernels.dp.cpp index 7144108593f..5f25d4c057b 100644 --- a/dpcpp/solver/lower_trs_kernels.dp.cpp +++ b/dpcpp/solver/lower_trs_kernels.dp.cpp @@ -60,11 +60,11 @@ namespace lower_trs { void should_perform_transpose(std::shared_ptr exec, - bool &do_transpose) GKO_NOT_IMPLEMENTED; + bool& do_transpose) GKO_NOT_IMPLEMENTED; void init_struct(std::shared_ptr exec, - std::shared_ptr &solve_struct) + std::shared_ptr& solve_struct) { // This init kernel is here to allow initialization of the solve struct for // a more sophisticated implementation as for other executors. @@ -73,8 +73,8 @@ void init_struct(std::shared_ptr exec, template void generate(std::shared_ptr exec, - const matrix::Csr *matrix, - solver::SolveStruct *solve_struct, const gko::size_type num_rhs) + const matrix::Csr* matrix, + solver::SolveStruct* solve_struct, const gko::size_type num_rhs) { // This generate kernel is here to allow for a more sophisticated // implementation as for other executors. This kernel would perform the @@ -91,11 +91,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( */ template void solve(std::shared_ptr exec, - const matrix::Csr *matrix, - const solver::SolveStruct *solve_struct, - matrix::Dense *trans_b, matrix::Dense *trans_x, - const matrix::Dense *b, - matrix::Dense *x) GKO_NOT_IMPLEMENTED; + const matrix::Csr* matrix, + const solver::SolveStruct* solve_struct, + matrix::Dense* trans_b, matrix::Dense* trans_x, + const matrix::Dense* b, + matrix::Dense* x) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_LOWER_TRS_SOLVE_KERNEL); diff --git a/dpcpp/solver/upper_trs_kernels.dp.cpp b/dpcpp/solver/upper_trs_kernels.dp.cpp index cc1d40f711d..e46e7cb5195 100644 --- a/dpcpp/solver/upper_trs_kernels.dp.cpp +++ b/dpcpp/solver/upper_trs_kernels.dp.cpp @@ -60,14 +60,14 @@ namespace upper_trs { void should_perform_transpose(std::shared_ptr exec, - bool &do_transpose) + bool& do_transpose) { do_transpose = false; } void init_struct(std::shared_ptr exec, - std::shared_ptr &solve_struct) + std::shared_ptr& solve_struct) { // This init kernel is here to allow initialization of the solve struct for // a more sophisticated implementation as for other executors. @@ -76,8 +76,8 @@ void init_struct(std::shared_ptr exec, template void generate(std::shared_ptr exec, - const matrix::Csr *matrix, - solver::SolveStruct *solve_struct, const gko::size_type num_rhs) + const matrix::Csr* matrix, + solver::SolveStruct* solve_struct, const gko::size_type num_rhs) { // This generate kernel is here to allow for a more sophisticated // implementation as for other executors. This kernel would perform the @@ -94,11 +94,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( */ template void solve(std::shared_ptr exec, - const matrix::Csr *matrix, - const solver::SolveStruct *solve_struct, - matrix::Dense *trans_b, matrix::Dense *trans_x, - const matrix::Dense *b, - matrix::Dense *x) GKO_NOT_IMPLEMENTED; + const matrix::Csr* matrix, + const solver::SolveStruct* solve_struct, + matrix::Dense* trans_b, matrix::Dense* trans_x, + const matrix::Dense* b, + matrix::Dense* x) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_UPPER_TRS_SOLVE_KERNEL); diff --git a/dpcpp/stop/criterion_kernels.dp.cpp b/dpcpp/stop/criterion_kernels.dp.cpp index 46dc6243ad8..012c726b208 100644 --- a/dpcpp/stop/criterion_kernels.dp.cpp +++ b/dpcpp/stop/criterion_kernels.dp.cpp @@ -52,11 +52,11 @@ namespace set_all_statuses { void set_all_statuses(std::shared_ptr exec, uint8 stoppingId, bool setFinalized, - Array *stop_status) + Array* stop_status) { auto size = stop_status->get_num_elems(); - stopping_status *__restrict__ stop_status_ptr = stop_status->get_data(); - exec->get_queue()->submit([&](sycl::handler &cgh) { + stopping_status* __restrict__ stop_status_ptr = stop_status->get_data(); + exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{size}, [=](sycl::id<1> idx_id) { const auto idx = idx_id[0]; stop_status_ptr[idx].stop(stoppingId, setFinalized); diff --git a/dpcpp/stop/residual_norm_kernels.dp.cpp b/dpcpp/stop/residual_norm_kernels.dp.cpp index a527ec4c564..d6fb04f3f4e 100644 --- a/dpcpp/stop/residual_norm_kernels.dp.cpp +++ b/dpcpp/stop/residual_norm_kernels.dp.cpp @@ -58,17 +58,17 @@ namespace residual_norm { template void residual_norm(std::shared_ptr exec, - const matrix::Dense *tau, - const matrix::Dense *orig_tau, + const matrix::Dense* tau, + const matrix::Dense* orig_tau, ValueType rel_residual_goal, uint8 stoppingId, - bool setFinalized, Array *stop_status, - Array *device_storage, bool *all_converged, - bool *one_changed) + bool setFinalized, Array* stop_status, + Array* device_storage, bool* all_converged, + bool* one_changed) { static_assert(is_complex_s::value == false, "ValueType must not be complex in this function!"); auto device_storage_val = device_storage->get_data(); - exec->get_queue()->submit([&](sycl::handler &cgh) { + exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{1}, [=](sycl::id<1>) { device_storage_val[0] = true; device_storage_val[1] = false; @@ -78,7 +78,7 @@ void residual_norm(std::shared_ptr exec, auto orig_tau_val = orig_tau->get_const_values(); auto tau_val = tau->get_const_values(); auto stop_status_val = stop_status->get_data(); - exec->get_queue()->submit([&](sycl::handler &cgh) { + exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl::range<1>{tau->get_size()[1]}, [=](sycl::id<1> idx_id) { const auto tidx = idx_id[0]; @@ -117,14 +117,14 @@ namespace implicit_residual_norm { template void implicit_residual_norm( std::shared_ptr exec, - const matrix::Dense *tau, - const matrix::Dense> *orig_tau, + const matrix::Dense* tau, + const matrix::Dense>* orig_tau, remove_complex rel_residual_goal, uint8 stoppingId, - bool setFinalized, Array *stop_status, - Array *device_storage, bool *all_converged, bool *one_changed) + bool setFinalized, Array* stop_status, + Array* device_storage, bool* all_converged, bool* one_changed) { auto device_storage_val = device_storage->get_data(); - exec->get_queue()->submit([&](sycl::handler &cgh) { + exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{1}, [=](sycl::id<1>) { device_storage_val[0] = true; device_storage_val[1] = false; @@ -134,7 +134,7 @@ void implicit_residual_norm( auto orig_tau_val = orig_tau->get_const_values(); auto tau_val = tau->get_const_values(); auto stop_status_val = stop_status->get_data(); - exec->get_queue()->submit([&](sycl::handler &cgh) { + exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl::range<1>{tau->get_size()[1]}, [=](sycl::id<1> idx_id) { const auto tidx = idx_id[0]; diff --git a/dpcpp/test/base/executor.dp.cpp b/dpcpp/test/base/executor.dp.cpp index 93f52a7d1f1..9d0a95fc93d 100644 --- a/dpcpp/test/base/executor.dp.cpp +++ b/dpcpp/test/base/executor.dp.cpp @@ -156,7 +156,7 @@ TEST_F(DpcppExecutor, KnowsNumberOfDevicesOfTypeAccelerator) TEST_F(DpcppExecutor, AllocatesAndFreesMemory) { - int *ptr = nullptr; + int* ptr = nullptr; ASSERT_NO_THROW(ptr = dpcpp->alloc(2)); ASSERT_NO_THROW(dpcpp->free(ptr)); @@ -166,7 +166,7 @@ TEST_F(DpcppExecutor, AllocatesAndFreesMemory) TEST_F(DpcppExecutor, FailsWhenOverallocating) { const gko::size_type num_elems = 1ll << 50; // 4PB of integers - int *ptr = nullptr; + int* ptr = nullptr; ASSERT_THROW( { @@ -179,7 +179,7 @@ TEST_F(DpcppExecutor, FailsWhenOverallocating) } -void check_data(int *data, bool *result) +void check_data(int* data, bool* result) { *result = false; if (data[0] == 3 && data[1] == 8) { @@ -190,15 +190,15 @@ void check_data(int *data, bool *result) TEST_F(DpcppExecutor, CopiesDataToCPU) { int orig[] = {3, 8}; - auto *copy = dpcpp->alloc(2); + auto* copy = dpcpp->alloc(2); gko::Array is_set(ref, 1); dpcpp->copy_from(ref.get(), 2, orig, copy); is_set.set_executor(dpcpp); ASSERT_NO_THROW(dpcpp->synchronize()); - ASSERT_NO_THROW(dpcpp->get_queue()->submit([&](sycl::handler &cgh) { - auto *is_set_ptr = is_set.get_data(); + ASSERT_NO_THROW(dpcpp->get_queue()->submit([&](sycl::handler& cgh) { + auto* is_set_ptr = is_set.get_data(); cgh.single_task([=]() { check_data(copy, is_set_ptr); }); })); is_set.set_executor(ref); @@ -207,7 +207,7 @@ TEST_F(DpcppExecutor, CopiesDataToCPU) dpcpp->free(copy); } -void init_data(int *data) +void init_data(int* data) { data[0] = 3; data[1] = 8; @@ -217,7 +217,7 @@ TEST_F(DpcppExecutor, CopiesDataFromCPU) { int copy[2]; auto orig = dpcpp->alloc(2); - dpcpp->get_queue()->submit([&](sycl::handler &cgh) { + dpcpp->get_queue()->submit([&](sycl::handler& cgh) { cgh.single_task([=]() { init_data(orig); }); }); @@ -238,7 +238,7 @@ TEST_F(DpcppExecutor, CopiesDataFromDpcppToDpcpp) int copy[2]; gko::Array is_set(ref, 1); auto orig = dpcpp->alloc(2); - dpcpp->get_queue()->submit([&](sycl::handler &cgh) { + dpcpp->get_queue()->submit([&](sycl::handler& cgh) { cgh.single_task([=]() { init_data(orig); }); }); @@ -246,8 +246,8 @@ TEST_F(DpcppExecutor, CopiesDataFromDpcppToDpcpp) dpcpp2->copy_from(dpcpp.get(), 2, orig, copy_dpcpp2); // Check that the data is really on GPU is_set.set_executor(dpcpp2); - ASSERT_NO_THROW(dpcpp2->get_queue()->submit([&](sycl::handler &cgh) { - auto *is_set_ptr = is_set.get_data(); + ASSERT_NO_THROW(dpcpp2->get_queue()->submit([&](sycl::handler& cgh) { + auto* is_set_ptr = is_set.get_data(); cgh.single_task([=]() { check_data(copy_dpcpp2, is_set_ptr); }); })); is_set.set_executor(ref); @@ -279,7 +279,7 @@ TEST_F(DpcppExecutor, FreeAfterKernel) gko::Array y(dpcpp, length); auto x_val = x.get_data(); auto y_val = y.get_data(); - dpcpp->get_queue()->submit([&](sycl::handler &cgh) { + dpcpp->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{length}, [=](sycl::id<1> i) { y_val[i] += x_val[i]; }); }); diff --git a/dpcpp/test/base/kernel_launch.dp.cpp b/dpcpp/test/base/kernel_launch.dp.cpp index 25a36d3a29a..27d3f1abd12 100644 --- a/dpcpp/test/base/kernel_launch.dp.cpp +++ b/dpcpp/test/base/kernel_launch.dp.cpp @@ -111,7 +111,7 @@ TEST_F(KernelLaunch, Runs1D) exec, [] GKO_KERNEL(auto i, auto d) { static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); d[i] = i; }, zero_array.get_num_elems(), zero_array.get_data()); @@ -126,8 +126,8 @@ TEST_F(KernelLaunch, Runs1DArray) exec, [] GKO_KERNEL(auto i, auto d, auto d_ptr) { static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); if (d == d_ptr) { d[i] = i; } else { @@ -146,11 +146,11 @@ TEST_F(KernelLaunch, Runs1DDense) exec, [] GKO_KERNEL(auto i, auto d, auto d2, auto d_ptr) { static_assert(is_same::value, "index"); - static_assert(is_same::value, + static_assert(is_same::value, "type"); - static_assert( - is_same::value, "type"); - static_assert(is_same::value, + static_assert(is_same::value, + "type"); + static_assert(is_same::value, "type"); bool pointers_correct = d.data == d_ptr && d2.data == d_ptr; bool strides_correct = d.stride == 5 && d2.stride == 5; @@ -165,7 +165,7 @@ TEST_F(KernelLaunch, Runs1DDense) d(i / 4, i % 4) = 0; } }, - 16, zero_dense2.get(), static_cast(zero_dense2.get()), + 16, zero_dense2.get(), static_cast(zero_dense2.get()), zero_dense2->get_const_values()); GKO_ASSERT_MTX_NEAR(zero_dense2, iota_dense, 0.0); @@ -179,7 +179,7 @@ TEST_F(KernelLaunch, Runs2D) [] GKO_KERNEL(auto i, auto j, auto d) { static_assert(is_same::value, "index"); static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); d[i + 4 * j] = 4 * i + j; }, dim<2>{4, 4}, zero_array.get_data()); @@ -195,8 +195,8 @@ TEST_F(KernelLaunch, Runs2DArray) [] GKO_KERNEL(auto i, auto j, auto d, auto d_ptr) { static_assert(is_same::value, "index"); static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); if (d == d_ptr) { d[i + 4 * j] = 4 * i + j; } else { @@ -216,18 +216,18 @@ TEST_F(KernelLaunch, Runs2DDense) [] GKO_KERNEL(auto i, auto j, auto d, auto d2, auto d_ptr, auto d3, auto d4, auto d2_ptr, auto d3_ptr) { static_assert(is_same::value, "index"); - static_assert(is_same::value, + static_assert(is_same::value, + "type"); + static_assert(is_same::value, "type"); - static_assert( - is_same::value, "type"); - static_assert(is_same::value, + static_assert(is_same::value, "type"); - static_assert(is_same::value, + static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); - static_assert(is_same::value, + static_assert(is_same::value, "type"); bool pointers_correct = d.data == d_ptr && d2.data == d_ptr && d3.data == d2_ptr && d4 == d3_ptr; @@ -247,7 +247,7 @@ TEST_F(KernelLaunch, Runs2DDense) } }, dim<2>{4, 4}, zero_dense->get_stride(), zero_dense2.get(), - static_cast(zero_dense2.get()), + static_cast(zero_dense2.get()), zero_dense2->get_const_values(), gko::kernels::dpcpp::default_stride(zero_dense.get()), gko::kernels::dpcpp::row_vector(vec_dense.get()), diff --git a/dpcpp/test/components/cooperative_groups_kernels.dp.cpp b/dpcpp/test/components/cooperative_groups_kernels.dp.cpp index 300b6cac8cf..7984d4108e8 100644 --- a/dpcpp/test/components/cooperative_groups_kernels.dp.cpp +++ b/dpcpp/test/components/cooperative_groups_kernels.dp.cpp @@ -118,7 +118,7 @@ class CooperativeGroups : public testing::TestWithParam { // kernel implementation template __WG_BOUND__(KCfg::decode<0>(config)) -void cg_shuffle(bool *s, sycl::nd_item<3> item_ct1) +void cg_shuffle(bool* s, sycl::nd_item<3> item_ct1) { constexpr auto sg_size = KCfg::decode<1>(config); auto group = @@ -135,10 +135,10 @@ void cg_shuffle(bool *s, sycl::nd_item<3> item_ct1) // group all kernel things together template void cg_shuffle_host(dim3 grid, dim3 block, - gko::size_type dynamic_shared_memory, sycl::queue *queue, - bool *s) + gko::size_type dynamic_shared_memory, sycl::queue* queue, + bool* s) { - queue->submit([&](sycl::handler &cgh) { + queue->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) { cg_shuffle(s, item_ct1); @@ -152,7 +152,7 @@ GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(cg_shuffle_config, cg_shuffle_host) // the call void cg_shuffle_config_call(std::uint32_t desired_cfg, dim3 grid, dim3 block, gko::size_type dynamic_shared_memory, - sycl::queue *queue, bool *s) + sycl::queue* queue, bool* s) { cg_shuffle_config( default_config_list, @@ -171,7 +171,7 @@ TEST_P(CooperativeGroups, Shuffle) template __WG_BOUND__(KCfg::decode<0>(config)) -void cg_all(bool *s, sycl::nd_item<3> item_ct1) +void cg_all(bool* s, sycl::nd_item<3> item_ct1) { constexpr auto sg_size = KCfg::decode<1>(config); auto group = @@ -188,12 +188,12 @@ GKO_ENABLE_DEFAULT_HOST_CONFIG(cg_all, cg_all) GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(cg_all, cg_all) GKO_ENABLE_DEFAULT_CONFIG_CALL(cg_all_call, cg_all, default_config_list) -TEST_P(CooperativeGroups, All) { test_all_subgroup(cg_all_call); } +TEST_P(CooperativeGroups, All) { test_all_subgroup(cg_all_call); } template __WG_BOUND__(KCfg::decode<0>(config)) -void cg_any(bool *s, sycl::nd_item<3> item_ct1) +void cg_any(bool* s, sycl::nd_item<3> item_ct1) { constexpr auto sg_size = KCfg::decode<1>(config); auto group = group::tiled_partition(config)>( @@ -209,12 +209,12 @@ GKO_ENABLE_DEFAULT_HOST_CONFIG(cg_any, cg_any) GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(cg_any, cg_any) GKO_ENABLE_DEFAULT_CONFIG_CALL(cg_any_call, cg_any, default_config_list) -TEST_P(CooperativeGroups, Any) { test_all_subgroup(cg_any_call); } +TEST_P(CooperativeGroups, Any) { test_all_subgroup(cg_any_call); } template __WG_BOUND__(KCfg::decode<0>(config)) -void cg_ballot(bool *s, sycl::nd_item<3> item_ct1) +void cg_ballot(bool* s, sycl::nd_item<3> item_ct1) { constexpr auto sg_size = KCfg::decode<1>(config); auto group = @@ -231,7 +231,7 @@ GKO_ENABLE_DEFAULT_HOST_CONFIG(cg_ballot, cg_ballot) GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(cg_ballot, cg_ballot) GKO_ENABLE_DEFAULT_CONFIG_CALL(cg_ballot_call, cg_ballot, default_config_list) -TEST_P(CooperativeGroups, Ballot) { test_all_subgroup(cg_ballot_call); } +TEST_P(CooperativeGroups, Ballot) { test_all_subgroup(cg_ballot_call); } INSTANTIATE_TEST_SUITE_P(DifferentSubgroup, CooperativeGroups, diff --git a/dpcpp/test/matrix/dense_kernels.cpp b/dpcpp/test/matrix/dense_kernels.cpp index cfecc3d3a03..c5bd49b6a97 100644 --- a/dpcpp/test/matrix/dense_kernels.cpp +++ b/dpcpp/test/matrix/dense_kernels.cpp @@ -141,7 +141,7 @@ class Dense : public ::testing::Test { std::shuffle(tmp2.begin(), tmp2.end(), rng); std::vector tmp3(x->get_size()[0] / 10); std::uniform_int_distribution row_dist(0, x->get_size()[0] - 1); - for (auto &i : tmp3) { + for (auto& i : tmp3) { i = row_dist(rng); } rpermute_idxs = @@ -153,7 +153,7 @@ class Dense : public ::testing::Test { } template - std::unique_ptr convert(InputType &&input) + std::unique_ptr convert(InputType&& input) { auto result = ConvertedType::create(input->get_executor()); input->convert_to(result.get()); @@ -397,8 +397,8 @@ TEST_F(Dense, IsTransposable) auto trans = x->transpose(); auto dtrans = dx->transpose(); - GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), - static_cast(trans.get()), 0); + GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), + static_cast(trans.get()), 0); } @@ -409,8 +409,8 @@ TEST_F(Dense, IsConjugateTransposable) auto trans = c_x->conj_transpose(); auto dtrans = dc_x->conj_transpose(); - GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), - static_cast(trans.get()), 0); + GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), + static_cast(trans.get()), 0); } diff --git a/dpcpp/test/matrix/diagonal_kernels.cpp b/dpcpp/test/matrix/diagonal_kernels.cpp index a1000ef21a8..baceca2dd52 100644 --- a/dpcpp/test/matrix/diagonal_kernels.cpp +++ b/dpcpp/test/matrix/diagonal_kernels.cpp @@ -240,9 +240,9 @@ TEST_F(Diagonal, ConjTransposeIsEquivalentToRef) set_up_complex_data(); auto trans = cdiag->conj_transpose(); - auto trans_diag = static_cast(trans.get()); + auto trans_diag = static_cast(trans.get()); auto dtrans = dcdiag->conj_transpose(); - auto dtrans_diag = static_cast(dtrans.get()); + auto dtrans_diag = static_cast(dtrans.get()); GKO_ASSERT_MTX_NEAR(trans_diag, dtrans_diag, 0); } diff --git a/dpcpp/test/solver/cb_gmres_kernels.cpp b/dpcpp/test/solver/cb_gmres_kernels.cpp index 295b793873e..88cb782cc77 100644 --- a/dpcpp/test/solver/cb_gmres_kernels.cpp +++ b/dpcpp/test/solver/cb_gmres_kernels.cpp @@ -104,7 +104,7 @@ class CbGmres : public ::testing::Test { Range3dHelper generate_krylov_helper(gko::dim<3> size) { auto helper = Range3dHelper{ref, size}; - auto &bases = helper.get_bases(); + auto& bases = helper.get_bases(); const auto num_rows = size[0] * size[1]; const auto num_cols = size[2]; auto temp_krylov_bases = gko::test::generate_random_matrix( @@ -201,7 +201,7 @@ class CbGmres : public ::testing::Test { void assert_krylov_bases_near() { gko::Array d_to_host{ref}; - auto &krylov_bases = range_helper.get_bases(); + auto& krylov_bases = range_helper.get_bases(); d_to_host = d_range_helper.get_bases(); const auto tolerance = r::value; using sycl::abs; diff --git a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp index 4e2be428fd1..209772d851b 100644 --- a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp +++ b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp @@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Some shortcuts using ValueType = double; diff --git a/examples/cb-gmres/cb-gmres.cpp b/examples/cb-gmres/cb-gmres.cpp index ffb52579132..4a3cd43f967 100644 --- a/examples/cb-gmres/cb-gmres.cpp +++ b/examples/cb-gmres/cb-gmres.cpp @@ -45,8 +45,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // To get an accurate result, the solve is repeated multiple times (while // ensuring the initial guess is always the same). The result of the solve will // be written to x. -double measure_solve_time_in_s(const gko::Executor *exec, gko::LinOp *solver, - const gko::LinOp *b, gko::LinOp *x) +double measure_solve_time_in_s(const gko::Executor* exec, gko::LinOp* solver, + const gko::LinOp* b, gko::LinOp* x) { constexpr int repeats{5}; double duration{0}; @@ -73,7 +73,7 @@ double measure_solve_time_in_s(const gko::Executor *exec, gko::LinOp *solver, } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Use some shortcuts. In Ginkgo, vectors are seen as a gko::matrix::Dense // with one column/one row. The advantage of this concept is that using diff --git a/examples/custom-logger/custom-logger.cpp b/examples/custom-logger/custom-logger.cpp index 39fcb440eb1..20646a500c6 100644 --- a/examples/custom-logger/custom-logger.cpp +++ b/examples/custom-logger/custom-logger.cpp @@ -54,7 +54,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Utility function which returns the first element (position [0, 0]) from a // given gko::matrix::Dense matrix / vector. template -ValueType get_first_element(const gko::matrix::Dense *mtx) +ValueType get_first_element(const gko::matrix::Dense* mtx) { // Copy the matrix / vector to the host device before accessing the value in // case it is stored in a GPU. @@ -66,7 +66,7 @@ ValueType get_first_element(const gko::matrix::Dense *mtx) // vector. template gko::remove_complex compute_norm( - const gko::matrix::Dense *b) + const gko::matrix::Dense* b) { // Get the executor of the vector auto exec = b->get_executor(); @@ -122,11 +122,11 @@ struct ResidualLogger : gko::log::Logger { using gko_real_dense = gko::matrix::Dense; // This overload is necessary to avoid interface breaks for Ginkgo 2.0 - void on_iteration_complete(const gko::LinOp *solver, - const gko::size_type &iteration, - const gko::LinOp *residual, - const gko::LinOp *solution, - const gko::LinOp *residual_norm) const override + void on_iteration_complete(const gko::LinOp* solver, + const gko::size_type& iteration, + const gko::LinOp* residual, + const gko::LinOp* solution, + const gko::LinOp* residual_norm) const override { this->on_iteration_complete(solver, iteration, residual, solution, residual_norm, nullptr); @@ -135,10 +135,10 @@ struct ResidualLogger : gko::log::Logger { // Customize the logging hook which is called everytime an iteration is // completed void on_iteration_complete( - const gko::LinOp *, const gko::size_type &iteration, - const gko::LinOp *residual, const gko::LinOp *solution, - const gko::LinOp *residual_norm, - const gko::LinOp *implicit_sq_residual_norm) const override + const gko::LinOp*, const gko::size_type& iteration, + const gko::LinOp* residual, const gko::LinOp* solution, + const gko::LinOp* residual_norm, + const gko::LinOp* implicit_sq_residual_norm) const override { // If the solver shares a residual norm, log its value if (residual_norm) { @@ -196,7 +196,7 @@ struct ResidualLogger : gko::log::Logger { // Construct the logger and store the system matrix and b vectors ResidualLogger(std::shared_ptr exec, - const gko::LinOp *matrix, const gko_dense *b) + const gko::LinOp* matrix, const gko_dense* b) : gko::log::Logger(exec, gko::log::Logger::iteration_complete_mask), matrix{matrix}, b{b} @@ -204,9 +204,9 @@ struct ResidualLogger : gko::log::Logger { private: // Pointer to the system matrix - const gko::LinOp *matrix; + const gko::LinOp* matrix; // Pointer to the right hand sides - const gko_dense *b; + const gko_dense* b; // Vector which stores all the recurrent residual norms mutable std::vector recurrent_norms{}; // Vector which stores all the real residual norms @@ -218,7 +218,7 @@ struct ResidualLogger : gko::log::Logger { }; -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Use some shortcuts. In Ginkgo, vectors are seen as a // gko::matrix::Dense with one column/one row. The advantage of this diff --git a/examples/custom-matrix-format/custom-matrix-format.cpp b/examples/custom-matrix-format/custom-matrix-format.cpp index cfad1b3bedd..d5e144d082e 100644 --- a/examples/custom-matrix-format/custom-matrix-format.cpp +++ b/examples/custom-matrix-format/custom-matrix-format.cpp @@ -43,8 +43,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // CUDA executor. Unfortunately, NVCC has serious problems interpreting some // parts of Ginkgo's code, so the kernel has to be compiled separately. template -void stencil_kernel(std::size_t size, const ValueType *coefs, - const ValueType *b, ValueType *x); +void stencil_kernel(std::size_t size, const ValueType* coefs, + const ValueType* b, ValueType* x); // A stencil matrix class representing the 3pt stencil linear operator. @@ -83,7 +83,7 @@ class StencilMatrix : public gko::EnableLinOp>, // For simplicity, we assume that there is always only one right hand side // and the stride of consecutive elements in the vectors is 1 (both of these // are always true in this example). - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override { // we only implement the operator for dense RHS. // gko::as will throw an exception if its argument is not Dense. @@ -93,8 +93,8 @@ class StencilMatrix : public gko::EnableLinOp>, // we need separate implementations depending on the executor, so we // create an operation which maps the call to the correct implementation struct stencil_operation : gko::Operation { - stencil_operation(const coef_type &coefficients, const vec *b, - vec *x) + stencil_operation(const coef_type& coefficients, const vec* b, + vec* x) : coefficients{coefficients}, b{b}, x{x} {} @@ -128,9 +128,9 @@ class StencilMatrix : public gko::EnableLinOp>, // If not provided, Ginkgo will use the implementation for the // OpenMP executor when calling it in the reference executor. - const coef_type &coefficients; - const vec *b; - vec *x; + const coef_type& coefficients; + const vec* b; + vec* x; }; this->get_executor()->run( stencil_operation(coefficients, dense_b, dense_x)); @@ -140,8 +140,8 @@ class StencilMatrix : public gko::EnableLinOp>, // x = alpha * A * b + beta * x. This function is commonly used and can // often be better optimized than implementing it using x = A * b. However, // for simplicity, we will implement it exactly like that in this example. - void apply_impl(const gko::LinOp *alpha, const gko::LinOp *b, - const gko::LinOp *beta, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* alpha, const gko::LinOp* b, + const gko::LinOp* beta, gko::LinOp* x) const override { auto dense_b = gko::as(b); auto dense_x = gko::as(x); @@ -159,7 +159,7 @@ class StencilMatrix : public gko::EnableLinOp>, // Creates a stencil matrix in CSR format for the given number of discretization // points. template -void generate_stencil_matrix(gko::matrix::Csr *matrix) +void generate_stencil_matrix(gko::matrix::Csr* matrix) { const auto discretization_points = matrix->get_size()[0]; auto row_ptrs = matrix->get_row_ptrs(); @@ -184,7 +184,7 @@ void generate_stencil_matrix(gko::matrix::Csr *matrix) // Generates the RHS vector given `f` and the boundary conditions. template void generate_rhs(Closure f, ValueType u0, ValueType u1, - gko::matrix::Dense *rhs) + gko::matrix::Dense* rhs) { const auto discretization_points = rhs->get_size()[0]; auto values = rhs->get_values(); @@ -201,7 +201,7 @@ void generate_rhs(Closure f, ValueType u0, ValueType u1, // Prints the solution `u`. template void print_solution(ValueType u0, ValueType u1, - const gko::matrix::Dense *u) + const gko::matrix::Dense* u) { std::cout << u0 << '\n'; for (int i = 0; i < u->get_size()[0]; ++i) { @@ -215,7 +215,7 @@ void print_solution(ValueType u0, ValueType u1, // solution function `correct_u`. template double calculate_error(int discretization_points, - const gko::matrix::Dense *u, + const gko::matrix::Dense* u, Closure correct_u) { const auto h = 1.0 / (discretization_points + 1); @@ -230,7 +230,7 @@ double calculate_error(int discretization_points, } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Some shortcuts using ValueType = double; diff --git a/examples/custom-matrix-format/stencil_kernel.cu b/examples/custom-matrix-format/stencil_kernel.cu index 47cd2540def..40c2de717b7 100644 --- a/examples/custom-matrix-format/stencil_kernel.cu +++ b/examples/custom-matrix-format/stencil_kernel.cu @@ -41,8 +41,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define STENCIL_KERNEL(_type) \ - void stencil_kernel(std::size_t size, const _type *coefs, const _type *b, \ - _type *x); + void stencil_kernel(std::size_t size, const _type* coefs, const _type* b, \ + _type* x); namespace { @@ -50,8 +50,8 @@ namespace { // a parallel CUDA kernel that computes the application of a 3 point stencil template -__global__ void stencil_kernel_impl(std::size_t size, const ValueType *coefs, - const ValueType *b, ValueType *x) +__global__ void stencil_kernel_impl(std::size_t size, const ValueType* coefs, + const ValueType* b, ValueType* x) { const auto thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id >= size) { @@ -72,8 +72,8 @@ __global__ void stencil_kernel_impl(std::size_t size, const ValueType *coefs, template -void stencil_kernel(std::size_t size, const ValueType *coefs, - const ValueType *b, ValueType *x) +void stencil_kernel(std::size_t size, const ValueType* coefs, + const ValueType* b, ValueType* x) { constexpr int block_size = 512; const auto grid_size = (size + block_size - 1) / block_size; diff --git a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp index 7c5074009bc..bf32e44fbcf 100644 --- a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp +++ b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp @@ -65,8 +65,8 @@ class ByInteraction protected: bool check_impl(gko::uint8 stoppingId, bool setFinalized, - gko::Array *stop_status, - bool *one_changed, const Criterion::Updater &) override + gko::Array* stop_status, + bool* one_changed, const Criterion::Updater&) override { bool result = *(parameters_.stop_iteration_process); if (result) { @@ -80,8 +80,8 @@ class ByInteraction : EnablePolymorphicObject(std::move(exec)) {} - explicit ByInteraction(const Factory *factory, - const gko::stop::CriterionArgs &args) + explicit ByInteraction(const Factory* factory, + const gko::stop::CriterionArgs& args) : EnablePolymorphicObject( factory->get_executor()), @@ -90,7 +90,7 @@ class ByInteraction }; -void run_solver(volatile bool *stop_iteration_process, +void run_solver(volatile bool* stop_iteration_process, std::shared_ptr exec) { // Some shortcuts @@ -138,7 +138,7 @@ void run_solver(volatile bool *stop_iteration_process, } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Print version information std::cout << gko::version_info::get() << std::endl; diff --git a/examples/external-lib-interfacing/external-lib-interfacing.cpp b/examples/external-lib-interfacing/external-lib-interfacing.cpp index 122b73b155b..490b6af1ab8 100644 --- a/examples/external-lib-interfacing/external-lib-interfacing.cpp +++ b/examples/external-lib-interfacing/external-lib-interfacing.cpp @@ -136,8 +136,8 @@ class AdvectionProblem { // contribution. With this information, the following should be // relatively self-explanatory: struct AssemblyScratchData { - AssemblyScratchData(const FiniteElement &fe); - AssemblyScratchData(const AssemblyScratchData &scratch_data); + AssemblyScratchData(const FiniteElement& fe); + AssemblyScratchData(const AssemblyScratchData& scratch_data); FEValues fe_values; FEFaceValues fe_face_values; @@ -151,9 +151,9 @@ class AdvectionProblem { void assemble_system(); void local_assemble_system( - const typename DoFHandler::active_cell_iterator &cell, - AssemblyScratchData &scratch, AssemblyCopyData ©_data); - void copy_local_to_global(const AssemblyCopyData ©_data); + const typename DoFHandler::active_cell_iterator& cell, + AssemblyScratchData& scratch, AssemblyCopyData& copy_data); + void copy_local_to_global(const AssemblyCopyData& copy_data); // The following functions again are as in previous examples, as are the @@ -204,10 +204,10 @@ class AdvectionField : public TensorFunction<1, dim> { public: AdvectionField() : TensorFunction<1, dim>() {} - virtual Tensor<1, dim> value(const Point &p) const; + virtual Tensor<1, dim> value(const Point& p) const; - virtual void value_list(const std::vector> &points, - std::vector> &values) const; + virtual void value_list(const std::vector>& points, + std::vector>& values) const; // In previous examples, we have used assertions that throw exceptions in // several places. However, we have never seen how such exceptions are @@ -250,7 +250,7 @@ class AdvectionField : public TensorFunction<1, dim> { // arrays, incompatible parameters to functions and the like; using // assertion as in this case can eliminate many of these problems. template -Tensor<1, dim> AdvectionField::value(const Point &p) const +Tensor<1, dim> AdvectionField::value(const Point& p) const { Point value; value[0] = 2; @@ -262,8 +262,8 @@ Tensor<1, dim> AdvectionField::value(const Point &p) const template -void AdvectionField::value_list(const std::vector> &points, - std::vector> &values) const +void AdvectionField::value_list(const std::vector>& points, + std::vector>& values) const { Assert(values.size() == points.size(), ExcDimensionMismatch(values.size(), points.size())); @@ -288,11 +288,11 @@ class RightHandSide : public Function { public: RightHandSide() : Function() {} - virtual double value(const Point &p, + virtual double value(const Point& p, const unsigned int component = 0) const; - virtual void value_list(const std::vector> &points, - std::vector &values, + virtual void value_list(const std::vector>& points, + std::vector& values, const unsigned int component = 0) const; private: @@ -321,7 +321,7 @@ const Point<3> RightHandSide<3>::center_point = Point<3>(-0.75, -0.75, -0.75); // one past the last (i.e. again the half-open interval so often used in the // C++ standard library): template -double RightHandSide::value(const Point &p, +double RightHandSide::value(const Point& p, const unsigned int component) const { (void)component; @@ -334,8 +334,8 @@ double RightHandSide::value(const Point &p, template -void RightHandSide::value_list(const std::vector> &points, - std::vector &values, +void RightHandSide::value_list(const std::vector>& points, + std::vector& values, const unsigned int component) const { Assert(values.size() == points.size(), @@ -353,17 +353,17 @@ class BoundaryValues : public Function { public: BoundaryValues() : Function() {} - virtual double value(const Point &p, + virtual double value(const Point& p, const unsigned int component = 0) const; - virtual void value_list(const std::vector> &points, - std::vector &values, + virtual void value_list(const std::vector>& points, + std::vector& values, const unsigned int component = 0) const; }; template -double BoundaryValues::value(const Point &p, +double BoundaryValues::value(const Point& p, const unsigned int component) const { (void)component; @@ -377,8 +377,8 @@ double BoundaryValues::value(const Point &p, template -void BoundaryValues::value_list(const std::vector> &points, - std::vector &values, +void BoundaryValues::value_list(const std::vector>& points, + std::vector& values, const unsigned int component) const { Assert(values.size() == points.size(), @@ -460,9 +460,9 @@ void BoundaryValues::value_list(const std::vector> &points, class GradientEstimation { public: template - static void estimate(const DoFHandler &dof, - const Vector &solution, - Vector &error_per_cell); + static void estimate(const DoFHandler& dof, + const Vector& solution, + Vector& error_per_cell); DeclException2(ExcInvalidVectorLength, int, int, << "Vector has length " << arg1 << ", but should have " @@ -472,23 +472,23 @@ class GradientEstimation { private: template struct EstimateScratchData { - EstimateScratchData(const FiniteElement &fe, - const Vector &solution, - Vector &error_per_cell); - EstimateScratchData(const EstimateScratchData &data); + EstimateScratchData(const FiniteElement& fe, + const Vector& solution, + Vector& error_per_cell); + EstimateScratchData(const EstimateScratchData& data); FEValues fe_midpoint_value; - const Vector &solution; - Vector &error_per_cell; + const Vector& solution; + Vector& error_per_cell; }; struct EstimateCopyData {}; template static void estimate_cell( - const typename DoFHandler::active_cell_iterator &cell, - EstimateScratchData &scratch_data, - const EstimateCopyData ©_data); + const typename DoFHandler::active_cell_iterator& cell, + EstimateScratchData& scratch_data, + const EstimateCopyData& copy_data); }; @@ -582,7 +582,7 @@ void AdvectionProblem::assemble_system() // class: template AdvectionProblem::AssemblyScratchData::AssemblyScratchData( - const FiniteElement &fe) + const FiniteElement& fe) : fe_values(fe, QGauss(2), update_values | update_gradients | update_quadrature_points | update_JxW_values), @@ -594,7 +594,7 @@ AdvectionProblem::AssemblyScratchData::AssemblyScratchData( template AdvectionProblem::AssemblyScratchData::AssemblyScratchData( - const AssemblyScratchData &scratch_data) + const AssemblyScratchData& scratch_data) : fe_values(scratch_data.fe_values.get_fe(), scratch_data.fe_values.get_quadrature(), update_values | update_gradients | update_quadrature_points | @@ -641,8 +641,8 @@ AdvectionProblem::AssemblyScratchData::AssemblyScratchData( // an exercise. template void AdvectionProblem::local_assemble_system( - const typename DoFHandler::active_cell_iterator &cell, - AssemblyScratchData &scratch_data, AssemblyCopyData ©_data) + const typename DoFHandler::active_cell_iterator& cell, + AssemblyScratchData& scratch_data, AssemblyCopyData& copy_data) { // First of all, we will need some objects that describe boundary values, // right hand side function and the advection field. As we will only @@ -798,7 +798,7 @@ void AdvectionProblem::local_assemble_system( // cell. The following should therefore be pretty obvious: template void AdvectionProblem::copy_local_to_global( - const AssemblyCopyData ©_data) + const AssemblyCopyData& copy_data) { for (unsigned int i = 0; i < copy_data.local_dof_indices.size(); ++i) { for (unsigned int j = 0; j < copy_data.local_dof_indices.size(); ++j) @@ -846,9 +846,9 @@ void AdvectionProblem::solve() auto x = vec::create(exec, gko::dim<2>(num_rows, 1)); auto A = mtx::create(exec, gko::dim<2>(num_rows), system_matrix.n_nonzero_elements()); - mtx::value_type *values = A->get_values(); - mtx::index_type *row_ptr = A->get_row_ptrs(); - mtx::index_type *col_idx = A->get_col_idxs(); + mtx::value_type* values = A->get_values(); + mtx::index_type* row_ptr = A->get_row_ptrs(); + mtx::index_type* col_idx = A->get_col_idxs(); // Convert to standard CSR format // As deal.ii does not expose its system matrix pointers, we construct them @@ -992,8 +992,8 @@ void AdvectionProblem::run() // estimate_cell() function: template GradientEstimation::EstimateScratchData::EstimateScratchData( - const FiniteElement &fe, const Vector &solution, - Vector &error_per_cell) + const FiniteElement& fe, const Vector& solution, + Vector& error_per_cell) : fe_midpoint_value(fe, QMidpoint(), update_values | update_quadrature_points), solution(solution), @@ -1003,7 +1003,7 @@ GradientEstimation::EstimateScratchData::EstimateScratchData( template GradientEstimation::EstimateScratchData::EstimateScratchData( - const EstimateScratchData &scratch_data) + const EstimateScratchData& scratch_data) : fe_midpoint_value(scratch_data.fe_midpoint_value.get_fe(), scratch_data.fe_midpoint_value.get_quadrature(), update_values | update_quadrature_points), @@ -1024,9 +1024,9 @@ GradientEstimation::EstimateScratchData::EstimateScratchData( // data somewhere in memory, or non-reproducible results), it is // well worth the effort to check for such things. template -void GradientEstimation::estimate(const DoFHandler &dof_handler, - const Vector &solution, - Vector &error_per_cell) +void GradientEstimation::estimate(const DoFHandler& dof_handler, + const Vector& solution, + Vector& error_per_cell) { Assert(error_per_cell.size() == dof_handler.get_triangulation().n_active_cells(), @@ -1036,7 +1036,7 @@ void GradientEstimation::estimate(const DoFHandler &dof_handler, WorkStream::run(dof_handler.begin_active(), dof_handler.end(), &GradientEstimation::template estimate_cell, - std::function(), + std::function(), EstimateScratchData(dof_handler.get_fe(), solution, error_per_cell), EstimateCopyData()); @@ -1092,8 +1092,8 @@ void GradientEstimation::estimate(const DoFHandler &dof_handler, // Now for the details: template void GradientEstimation::estimate_cell( - const typename DoFHandler::active_cell_iterator &cell, - EstimateScratchData &scratch_data, const EstimateCopyData &) + const typename DoFHandler::active_cell_iterator& cell, + EstimateScratchData& scratch_data, const EstimateCopyData&) { // We need space for the tensor Y, which is the sum of // outer products of the y-vectors. @@ -1332,7 +1332,7 @@ int main() Step9::AdvectionProblem<2> advection_problem_2d; advection_problem_2d.run(); - } catch (std::exception &exc) { + } catch (std::exception& exc) { std::cerr << std::endl << std::endl << "----------------------------------------------------" diff --git a/examples/ginkgo-overhead/ginkgo-overhead.cpp b/examples/ginkgo-overhead/ginkgo-overhead.cpp index ba41b01acf4..63c1770033a 100644 --- a/examples/ginkgo-overhead/ginkgo-overhead.cpp +++ b/examples/ginkgo-overhead/ginkgo-overhead.cpp @@ -38,14 +38,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -[[noreturn]] void print_usage_and_exit(const char *name) +[[noreturn]] void print_usage_and_exit(const char* name) { std::cerr << "Usage: " << name << " [NUM_ITERS]" << std::endl; std::exit(-1); } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { using ValueType = double; using IndexType = int; diff --git a/examples/ginkgo-ranges/ginkgo-ranges.cpp b/examples/ginkgo-ranges/ginkgo-ranges.cpp index 0853e74f3fc..d0a389542ee 100644 --- a/examples/ginkgo-ranges/ginkgo-ranges.cpp +++ b/examples/ginkgo-ranges/ginkgo-ranges.cpp @@ -38,7 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // LU factorization implementation using Ginkgo ranges // For simplicity, we only consider square matrices, and no pivoting. template -void factorize(const gko::range &A) +void factorize(const gko::range& A) // note: const means that the range (i.e. the data handler) is constant, // not that the underlying data is constant! { @@ -59,7 +59,7 @@ void factorize(const gko::range &A) // a utility function for printing the factorization on screen template -void print_lu(const gko::range &A) +void print_lu(const gko::range& A) { std::cout << std::setprecision(2) << std::fixed; std::cout << "L = ["; @@ -80,7 +80,7 @@ void print_lu(const gko::range &A) } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { using ValueType = double; using IndexType = int; diff --git a/examples/heat-equation/heat-equation.cpp b/examples/heat-equation/heat-equation.cpp index 60ca7f53eb3..5628f3e6c53 100644 --- a/examples/heat-equation/heat-equation.cpp +++ b/examples/heat-equation/heat-equation.cpp @@ -78,7 +78,7 @@ setting. // This function implements a simple Ginkgo-themed clamped color mapping for // values in the range [0,5]. -void set_val(unsigned char *data, double value) +void set_val(unsigned char* data, double value) { // RGB values for the 6 colors used for values 0, 1, ..., 5 // We will interpolate linearly between these values. @@ -108,8 +108,8 @@ std::pair build_output(int n, double fps) // Write the current frame to video output using the above color mapping -void output_timestep(std::pair &output, int n, - const double *data) +void output_timestep(std::pair& output, int n, + const double* data) { for (int i = 0; i < n; i++) { auto row = output.second.ptr(i); @@ -121,7 +121,7 @@ void output_timestep(std::pair &output, int n, } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { using mtx = gko::matrix::Csr<>; using vec = gko::matrix::Dense<>; diff --git a/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp b/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp index 1b9e7b5f53b..904b8426ee5 100644 --- a/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp +++ b/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp @@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Some shortcuts using ValueType = double; diff --git a/examples/inverse-iteration/inverse-iteration.cpp b/examples/inverse-iteration/inverse-iteration.cpp index 4025c743ba1..c94afc2c082 100644 --- a/examples/inverse-iteration/inverse-iteration.cpp +++ b/examples/inverse-iteration/inverse-iteration.cpp @@ -42,7 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Some shortcuts using precision = std::complex; diff --git a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp index 495ad7e0ee8..77a91ca3b45 100644 --- a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp +++ b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp @@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Some shortcuts using ValueType = double; diff --git a/examples/iterative-refinement/iterative-refinement.cpp b/examples/iterative-refinement/iterative-refinement.cpp index 4e3ad030e31..fa746358db7 100644 --- a/examples/iterative-refinement/iterative-refinement.cpp +++ b/examples/iterative-refinement/iterative-refinement.cpp @@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Some shortcuts using ValueType = double; diff --git a/examples/mixed-precision-ir/mixed-precision-ir.cpp b/examples/mixed-precision-ir/mixed-precision-ir.cpp index 5735508c43f..512a83f9641 100644 --- a/examples/mixed-precision-ir/mixed-precision-ir.cpp +++ b/examples/mixed-precision-ir/mixed-precision-ir.cpp @@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Some shortcuts using ValueType = double; diff --git a/examples/mixed-spmv/mixed-spmv.cpp b/examples/mixed-spmv/mixed-spmv.cpp index 64eff3aa96c..7d5ba73fc5e 100644 --- a/examples/mixed-spmv/mixed-spmv.cpp +++ b/examples/mixed-spmv/mixed-spmv.cpp @@ -65,7 +65,7 @@ namespace { */ template typename std::enable_if::value, ValueType>::type -get_rand_value(ValueDistribution &&value_dist, Engine &&gen) +get_rand_value(ValueDistribution&& value_dist, Engine&& gen) { return value_dist(gen); } @@ -77,7 +77,7 @@ get_rand_value(ValueDistribution &&value_dist, Engine &&gen) */ template typename std::enable_if::value, ValueType>::type -get_rand_value(ValueDistribution &&value_dist, Engine &&gen) +get_rand_value(ValueDistribution&& value_dist, Engine&& gen) { return ValueType(value_dist(gen), value_dist(gen)); } @@ -125,7 +125,7 @@ double timing(std::shared_ptr exec, } // namespace -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Use some shortcuts. In Ginkgo, vectors are seen as a gko::matrix::Dense // with one column/one row. The advantage of this concept is that using diff --git a/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp b/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp index cf3d360c11f..04f833242d7 100644 --- a/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp +++ b/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp @@ -92,9 +92,9 @@ constexpr double default_gamma = -1.0 / 6.0; // Creates a stencil matrix in CSR format for the given number of discretization // points. template -void generate_stencil_matrix(IndexType dp, IndexType *row_ptrs, - IndexType *col_idxs, ValueType *values, - ValueType *coefs) +void generate_stencil_matrix(IndexType dp, IndexType* row_ptrs, + IndexType* col_idxs, ValueType* values, + ValueType* coefs) { IndexType pos = 0; const size_t dp_2 = dp * dp; @@ -122,8 +122,8 @@ void generate_stencil_matrix(IndexType dp, IndexType *row_ptrs, // Generates the RHS vector given `f` and the boundary conditions. template -void generate_rhs(IndexType dp, Closure f, ClosureT u, ValueType *rhs, - ValueType *coefs) +void generate_rhs(IndexType dp, Closure f, ClosureT u, ValueType* rhs, + ValueType* coefs) { const size_t dp_2 = dp * dp; const ValueType h = 1.0 / (dp + 1.0); @@ -175,7 +175,7 @@ void generate_rhs(IndexType dp, Closure f, ClosureT u, ValueType *rhs, // Prints the solution `u`. template -void print_solution(IndexType dp, const ValueType *u) +void print_solution(IndexType dp, const ValueType* u) { for (IndexType i = 0; i < dp; ++i) { for (IndexType j = 0; j < dp; ++j) { @@ -190,7 +190,7 @@ void print_solution(IndexType dp, const ValueType *u) // Computes the 1-norm of the error given the computed `u` and the correct // solution function `correct_u`. template -gko::remove_complex calculate_error(IndexType dp, const ValueType *u, +gko::remove_complex calculate_error(IndexType dp, const ValueType* u, Closure correct_u) { const ValueType h = 1.0 / (dp + 1); @@ -209,10 +209,10 @@ gko::remove_complex calculate_error(IndexType dp, const ValueType *u, template -void solve_system(const std::string &executor_string, - unsigned int discretization_points, IndexType *row_ptrs, - IndexType *col_idxs, ValueType *values, ValueType *rhs, - ValueType *u, gko::remove_complex reduction_factor) +void solve_system(const std::string& executor_string, + unsigned int discretization_points, IndexType* row_ptrs, + IndexType* col_idxs, ValueType* values, ValueType* rhs, + ValueType* u, gko::remove_complex reduction_factor) { // Some shortcuts using vec = gko::matrix::Dense; @@ -221,7 +221,7 @@ void solve_system(const std::string &executor_string, using bj = gko::preconditioner::Jacobi; using val_array = gko::Array; using idx_array = gko::Array; - const auto &dp = discretization_points; + const auto& dp = discretization_points; const gko::size_type dp_2 = dp * dp; // Figure out where to run the code @@ -297,7 +297,7 @@ void solve_system(const std::string &executor_string, } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { using ValueType = double; using IndexType = int; diff --git a/examples/papi-logging/papi-logging.cpp b/examples/papi-logging/papi-logging.cpp index 6aff2772980..4e5cae6d984 100644 --- a/examples/papi-logging/papi-logging.cpp +++ b/examples/papi-logging/papi-logging.cpp @@ -44,7 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { -void papi_add_event(const std::string &event_name, int &eventset) +void papi_add_event(const std::string& event_name, int& eventset) { int code; int ret_val = PAPI_event_name_to_code(event_name.c_str(), &code); @@ -62,7 +62,7 @@ void papi_add_event(const std::string &event_name, int &eventset) template -std::string to_string(T *ptr) +std::string to_string(T* ptr) { std::ostringstream os; os << reinterpret_cast(ptr); @@ -124,7 +124,7 @@ void print_papi_counters(int eventset) } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Some shortcuts using ValueType = double; diff --git a/examples/par-ilu-convergence/par-ilu-convergence.cpp b/examples/par-ilu-convergence/par-ilu-convergence.cpp index b72ea5e667a..c1acc761838 100644 --- a/examples/par-ilu-convergence/par-ilu-convergence.cpp +++ b/examples/par-ilu-convergence/par-ilu-convergence.cpp @@ -65,7 +65,7 @@ auto try_generate(Function fun) -> decltype(fun()) decltype(fun()) result; try { result = fun(); - } catch (const gko::Error &err) { + } catch (const gko::Error& err) { std::cerr << "Error: " << err.what() << '\n'; std::exit(-1); } @@ -75,8 +75,8 @@ auto try_generate(Function fun) -> decltype(fun()) template double compute_ilu_residual_norm( - const gko::matrix::Csr *residual, - const gko::matrix::Csr *mtx) + const gko::matrix::Csr* residual, + const gko::matrix::Csr* mtx) { gko::matrix_data residual_data; gko::matrix_data mtx_data; @@ -98,7 +98,7 @@ double compute_ilu_residual_norm( } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { using ValueType = double; using IndexType = int; diff --git a/examples/performance-debugging/performance-debugging.cpp b/examples/performance-debugging/performance-debugging.cpp index 902062c06fd..34f189363ce 100644 --- a/examples/performance-debugging/performance-debugging.cpp +++ b/examples/performance-debugging/performance-debugging.cpp @@ -74,14 +74,14 @@ std::unique_ptr> create_vector( // utilities for computing norms and residuals template -ValueType get_first_element(const vec *norm) +ValueType get_first_element(const vec* norm) { return norm->get_executor()->copy_val_to_host(norm->get_const_values()); } template -gko::remove_complex compute_norm(const vec *b) +gko::remove_complex compute_norm(const vec* b) { auto exec = b->get_executor(); auto b_norm = gko::initialize>({0.0}, exec); @@ -92,8 +92,8 @@ gko::remove_complex compute_norm(const vec *b) template gko::remove_complex compute_residual_norm( - const gko::LinOp *system_matrix, const vec *b, - const vec *x) + const gko::LinOp* system_matrix, const vec* b, + const vec* x) { auto exec = system_matrix->get_executor(); auto one = gko::initialize>({1.0}, exec); @@ -116,62 +116,62 @@ namespace loggers { // taken before and after. This can create significant overhead since to ensure // proper timings, calls to `synchronize` are required. struct OperationLogger : gko::log::Logger { - void on_allocation_started(const gko::Executor *exec, - const gko::size_type &) const override + void on_allocation_started(const gko::Executor* exec, + const gko::size_type&) const override { this->start_operation(exec, "allocate"); } - void on_allocation_completed(const gko::Executor *exec, - const gko::size_type &, - const gko::uintptr &) const override + void on_allocation_completed(const gko::Executor* exec, + const gko::size_type&, + const gko::uintptr&) const override { this->end_operation(exec, "allocate"); } - void on_free_started(const gko::Executor *exec, - const gko::uintptr &) const override + void on_free_started(const gko::Executor* exec, + const gko::uintptr&) const override { this->start_operation(exec, "free"); } - void on_free_completed(const gko::Executor *exec, - const gko::uintptr &) const override + void on_free_completed(const gko::Executor* exec, + const gko::uintptr&) const override { this->end_operation(exec, "free"); } - void on_copy_started(const gko::Executor *from, const gko::Executor *to, - const gko::uintptr &, const gko::uintptr &, - const gko::size_type &) const override + void on_copy_started(const gko::Executor* from, const gko::Executor* to, + const gko::uintptr&, const gko::uintptr&, + const gko::size_type&) const override { from->synchronize(); this->start_operation(to, "copy"); } - void on_copy_completed(const gko::Executor *from, const gko::Executor *to, - const gko::uintptr &, const gko::uintptr &, - const gko::size_type &) const override + void on_copy_completed(const gko::Executor* from, const gko::Executor* to, + const gko::uintptr&, const gko::uintptr&, + const gko::size_type&) const override { from->synchronize(); this->end_operation(to, "copy"); } - void on_operation_launched(const gko::Executor *exec, - const gko::Operation *op) const override + void on_operation_launched(const gko::Executor* exec, + const gko::Operation* op) const override { this->start_operation(exec, op->get_name()); } - void on_operation_completed(const gko::Executor *exec, - const gko::Operation *op) const override + void on_operation_completed(const gko::Executor* exec, + const gko::Operation* op) const override { this->end_operation(exec, op->get_name()); } - void write_data(std::ostream &ostream) + void write_data(std::ostream& ostream) { - for (const auto &entry : total) { + for (const auto& entry : total) { ostream << "\t" << entry.first.c_str() << ": " << std::chrono::duration_cast( entry.second) @@ -186,8 +186,8 @@ struct OperationLogger : gko::log::Logger { private: // Helper which synchronizes and starts the time before every operation. - void start_operation(const gko::Executor *exec, - const std::string &name) const + void start_operation(const gko::Executor* exec, + const std::string& name) const { nested.emplace_back(0); exec->synchronize(); @@ -196,7 +196,7 @@ struct OperationLogger : gko::log::Logger { // Helper to compute the end time and store the operation's time at its // end. Also time nested operations. - void end_operation(const gko::Executor *exec, const std::string &name) const + void end_operation(const gko::Executor* exec, const std::string& name) const { exec->synchronize(); const auto end = std::chrono::steady_clock::now(); @@ -220,25 +220,25 @@ struct OperationLogger : gko::log::Logger { // This logger tracks the persistently allocated data struct StorageLogger : gko::log::Logger { // Store amount of bytes allocated on every allocation - void on_allocation_completed(const gko::Executor *, - const gko::size_type &num_bytes, - const gko::uintptr &location) const override + void on_allocation_completed(const gko::Executor*, + const gko::size_type& num_bytes, + const gko::uintptr& location) const override { storage[location] = num_bytes; } // Reset the amount of bytes on every free - void on_free_completed(const gko::Executor *, - const gko::uintptr &location) const override + void on_free_completed(const gko::Executor*, + const gko::uintptr& location) const override { storage[location] = 0; } // Write the data after summing the total from all allocations - void write_data(std::ostream &ostream) + void write_data(std::ostream& ostream) { gko::size_type total{}; - for (const auto &e : storage) { + for (const auto& e : storage) { total += e.second; } ostream << "Storage: " << total << std::endl; @@ -259,10 +259,10 @@ struct ResidualLogger : gko::log::Logger { // Depending on the available information, store the norm or compute it from // the residual. If the true residual norm could not be computed, store the // value `-1.0`. - void on_iteration_complete(const gko::LinOp *, const gko::size_type &, - const gko::LinOp *residual, - const gko::LinOp *solution, - const gko::LinOp *residual_norm) const override + void on_iteration_complete(const gko::LinOp*, const gko::size_type&, + const gko::LinOp* residual, + const gko::LinOp* solution, + const gko::LinOp* residual_norm) const override { if (residual_norm) { rec_res_norms.push_back(utils::get_first_element( @@ -280,32 +280,32 @@ struct ResidualLogger : gko::log::Logger { } ResidualLogger(std::shared_ptr exec, - const gko::LinOp *matrix, const vec *b) + const gko::LinOp* matrix, const vec* b) : gko::log::Logger(exec, gko::log::Logger::iteration_complete_mask), matrix{matrix}, b{b} {} - void write_data(std::ostream &ostream) + void write_data(std::ostream& ostream) { ostream << "Recurrent Residual Norms: " << std::endl; ostream << "[" << std::endl; - for (const auto &entry : rec_res_norms) { + for (const auto& entry : rec_res_norms) { ostream << "\t" << entry << std::endl; } ostream << "];" << std::endl; ostream << "True Residual Norms: " << std::endl; ostream << "[" << std::endl; - for (const auto &entry : true_res_norms) { + for (const auto& entry : true_res_norms) { ostream << "\t" << entry << std::endl; } ostream << "];" << std::endl; } private: - const gko::LinOp *matrix; - const vec *b; + const gko::LinOp* matrix; + const vec* b; mutable std::vector> rec_res_norms; mutable std::vector> true_res_norms; }; @@ -318,7 +318,7 @@ namespace { // Print usage help -void print_usage(const char *filename) +void print_usage(const char* filename) { std::cerr << "Usage: " << filename << " [executor] [matrix file]" << std::endl; @@ -330,7 +330,7 @@ void print_usage(const char *filename) template -void print_vector(const gko::matrix::Dense *vec) +void print_vector(const gko::matrix::Dense* vec) { auto elements_to_print = std::min(gko::size_type(10), vec->get_size()[0]); std::cout << "[" << std::endl; @@ -344,7 +344,7 @@ void print_vector(const gko::matrix::Dense *vec) } // namespace -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Parametrize the benchmark here // Pick a value type diff --git a/examples/poisson-solver/poisson-solver.cpp b/examples/poisson-solver/poisson-solver.cpp index 2d04a84a7d3..1d22051f700 100644 --- a/examples/poisson-solver/poisson-solver.cpp +++ b/examples/poisson-solver/poisson-solver.cpp @@ -40,7 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Creates a stencil matrix in CSR format for the given number of discretization // points. template -void generate_stencil_matrix(gko::matrix::Csr *matrix) +void generate_stencil_matrix(gko::matrix::Csr* matrix) { const auto discretization_points = matrix->get_size()[0]; auto row_ptrs = matrix->get_row_ptrs(); @@ -65,7 +65,7 @@ void generate_stencil_matrix(gko::matrix::Csr *matrix) // Generates the RHS vector given `f` and the boundary conditions. template void generate_rhs(Closure f, ValueType u0, ValueType u1, - gko::matrix::Dense *rhs) + gko::matrix::Dense* rhs) { const auto discretization_points = rhs->get_size()[0]; auto values = rhs->get_values(); @@ -82,7 +82,7 @@ void generate_rhs(Closure f, ValueType u0, ValueType u1, // Prints the solution `u`. template void print_solution(ValueType u0, ValueType u1, - const gko::matrix::Dense *u) + const gko::matrix::Dense* u) { std::cout << u0 << '\n'; for (int i = 0; i < u->get_size()[0]; ++i) { @@ -96,7 +96,7 @@ void print_solution(ValueType u0, ValueType u1, // solution function `correct_u`. template gko::remove_complex calculate_error( - int discretization_points, const gko::matrix::Dense *u, + int discretization_points, const gko::matrix::Dense* u, Closure correct_u) { const ValueType h = 1.0 / static_cast(discretization_points + 1); @@ -111,7 +111,7 @@ gko::remove_complex calculate_error( } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Some shortcuts using ValueType = double; diff --git a/examples/preconditioned-solver/preconditioned-solver.cpp b/examples/preconditioned-solver/preconditioned-solver.cpp index ce01f0c0cc5..95e174698c8 100644 --- a/examples/preconditioned-solver/preconditioned-solver.cpp +++ b/examples/preconditioned-solver/preconditioned-solver.cpp @@ -40,7 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Some shortcuts using ValueType = double; diff --git a/examples/preconditioner-export/preconditioner-export.cpp b/examples/preconditioner-export/preconditioner-export.cpp index 6d048e0e9d9..c3ff256f419 100644 --- a/examples/preconditioner-export/preconditioner-export.cpp +++ b/examples/preconditioner-export/preconditioner-export.cpp @@ -61,7 +61,7 @@ const std::map()>> }}}; -void output(const gko::WritableToMatrixData *mtx, std::string name) +void output(const gko::WritableToMatrixData* mtx, std::string name) { std::ofstream stream{name}; std::cerr << "Writing " << name << std::endl; @@ -75,7 +75,7 @@ auto try_generate(Function fun) -> decltype(fun()) decltype(fun()) result; try { result = fun(); - } catch (const gko::Error &err) { + } catch (const gko::Error& err) { std::cerr << "Error: " << err.what() << '\n'; std::exit(-1); } @@ -83,7 +83,7 @@ auto try_generate(Function fun) -> decltype(fun()) } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // print usage message if (argc < 2 || executors.find(argv[1]) == executors.end()) { diff --git a/examples/simple-solver-logging/simple-solver-logging.cpp b/examples/simple-solver-logging/simple-solver-logging.cpp index a7f5bbaca84..34d82a2794d 100644 --- a/examples/simple-solver-logging/simple-solver-logging.cpp +++ b/examples/simple-solver-logging/simple-solver-logging.cpp @@ -44,8 +44,8 @@ namespace { template -void print_vector(const std::string &name, - const gko::matrix::Dense *vec) +void print_vector(const std::string& name, + const gko::matrix::Dense* vec) { std::cout << name << " = [" << std::endl; for (int i = 0; i < vec->get_size()[0]; ++i) { @@ -58,7 +58,7 @@ void print_vector(const std::string &name, } // namespace -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Some shortcuts using ValueType = double; @@ -168,7 +168,7 @@ int main(int argc, char *argv[]) // Finally, get some data from `record_logger` and print the last memory // location copied - auto &last_copy = record_logger->get().copy_completed.back(); + auto& last_copy = record_logger->get().copy_completed.back(); std::cout << "Last memory copied was of size " << std::hex << std::get<0>(*last_copy).num_bytes << " FROM executor " << std::get<0>(*last_copy).exec << " pointer " diff --git a/examples/simple-solver/simple-solver.cpp b/examples/simple-solver/simple-solver.cpp index bf8095fc373..4a9a09961b3 100644 --- a/examples/simple-solver/simple-solver.cpp +++ b/examples/simple-solver/simple-solver.cpp @@ -45,7 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { // Use some shortcuts. In Ginkgo, vectors are seen as a gko::matrix::Dense // with one column/one row. The advantage of this concept is that using diff --git a/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp b/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp index a050be1a7fe..5fedd6ed7ce 100644 --- a/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp +++ b/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp @@ -80,8 +80,8 @@ use Ginkgo, and the only part where Ginkgo is introduced is inside the // points. template void generate_stencil_matrix(IndexType discretization_points, - IndexType *row_ptrs, IndexType *col_idxs, - ValueType *values) + IndexType* row_ptrs, IndexType* col_idxs, + ValueType* values) { IndexType pos = 0; const ValueType coefs[] = {-1, 2, -1}; @@ -102,7 +102,7 @@ void generate_stencil_matrix(IndexType discretization_points, // Generates the RHS vector given `f` and the boundary conditions. template void generate_rhs(IndexType discretization_points, Closure f, ValueType u0, - ValueType u1, ValueType *rhs) + ValueType u1, ValueType* rhs) { const ValueType h = 1.0 / (discretization_points + 1); for (IndexType i = 0; i < discretization_points; ++i) { @@ -117,7 +117,7 @@ void generate_rhs(IndexType discretization_points, Closure f, ValueType u0, // Prints the solution `u`. template void print_solution(IndexType discretization_points, ValueType u0, ValueType u1, - const ValueType *u) + const ValueType* u) { std::cout << u0 << '\n'; for (IndexType i = 0; i < discretization_points; ++i) { @@ -131,7 +131,7 @@ void print_solution(IndexType discretization_points, ValueType u0, ValueType u1, // solution function `correct_u`. template gko::remove_complex calculate_error(IndexType discretization_points, - const ValueType *u, + const ValueType* u, Closure correct_u) { const ValueType h = 1.0 / (discretization_points + 1); @@ -145,10 +145,10 @@ gko::remove_complex calculate_error(IndexType discretization_points, } template -void solve_system(const std::string &executor_string, - IndexType discretization_points, IndexType *row_ptrs, - IndexType *col_idxs, ValueType *values, ValueType *rhs, - ValueType *u, gko::remove_complex reduction_factor) +void solve_system(const std::string& executor_string, + IndexType discretization_points, IndexType* row_ptrs, + IndexType* col_idxs, ValueType* values, ValueType* rhs, + ValueType* u, gko::remove_complex reduction_factor) { // Some shortcuts using vec = gko::matrix::Dense; @@ -157,7 +157,7 @@ void solve_system(const std::string &executor_string, using bj = gko::preconditioner::Jacobi; using val_array = gko::Array; using idx_array = gko::Array; - const auto &dp = discretization_points; + const auto& dp = discretization_points; // Figure out where to run the code std::map()>> @@ -232,7 +232,7 @@ void solve_system(const std::string &executor_string, } -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { using ValueType = double; using IndexType = int; diff --git a/hip/base/device_guard.hip.hpp b/hip/base/device_guard.hip.hpp index a0e02c78658..381bed97efe 100644 --- a/hip/base/device_guard.hip.hpp +++ b/hip/base/device_guard.hip.hpp @@ -63,13 +63,13 @@ class device_guard { GKO_ASSERT_NO_HIP_ERRORS(hipSetDevice(device_id)); } - device_guard(device_guard &other) = delete; + device_guard(device_guard& other) = delete; - device_guard &operator=(const device_guard &other) = delete; + device_guard& operator=(const device_guard& other) = delete; - device_guard(device_guard &&other) = delete; + device_guard(device_guard&& other) = delete; - device_guard const &operator=(device_guard &&other) = delete; + device_guard const& operator=(device_guard&& other) = delete; ~device_guard() noexcept(false) { diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp index fa24c1eb929..6bfc0f217e5 100644 --- a/hip/base/executor.hip.cpp +++ b/hip/base/executor.hip.cpp @@ -69,12 +69,12 @@ std::shared_ptr HipExecutor::create( { return std::shared_ptr( new HipExecutor(device_id, std::move(master), device_reset, alloc_mode), - [device_id](HipExecutor *exec) { + [device_id](HipExecutor* exec) { auto device_reset = exec->get_device_reset(); std::lock_guard guard( hip_device_class::get_mutex(device_id)); delete exec; - auto &num_execs = hip_device_class::get_num_execs(device_id); + auto& num_execs = hip_device_class::get_num_execs(device_id); num_execs--; if (!num_execs && device_reset) { hip::device_guard g(device_id); @@ -84,7 +84,7 @@ std::shared_ptr HipExecutor::create( } -void HipExecutor::populate_exec_info(const MachineTopology *mach_topo) +void HipExecutor::populate_exec_info(const MachineTopology* mach_topo) { if (this->get_device_id() < this->get_num_devices() && this->get_device_id() >= 0) { @@ -104,8 +104,8 @@ void HipExecutor::populate_exec_info(const MachineTopology *mach_topo) } -void OmpExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void OmpExecutor::raw_copy_to(const HipExecutor* dest, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { if (num_bytes > 0) { hip::device_guard g(dest->get_device_id()); @@ -115,7 +115,7 @@ void OmpExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes, } -void HipExecutor::raw_free(void *ptr) const noexcept +void HipExecutor::raw_free(void* ptr) const noexcept { hip::device_guard g(this->get_device_id()); auto error_code = hipFree(ptr); @@ -133,9 +133,9 @@ void HipExecutor::raw_free(void *ptr) const noexcept } -void *HipExecutor::raw_alloc(size_type num_bytes) const +void* HipExecutor::raw_alloc(size_type num_bytes) const { - void *dev_ptr = nullptr; + void* dev_ptr = nullptr; hip::device_guard g(this->get_device_id()); int error_code = 0; if (this->alloc_mode_ == allocation_mode::device) { @@ -157,8 +157,8 @@ void *HipExecutor::raw_alloc(size_type num_bytes) const } -void HipExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void HipExecutor::raw_copy_to(const OmpExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { if (num_bytes > 0) { hip::device_guard g(this->get_device_id()); @@ -168,8 +168,8 @@ void HipExecutor::raw_copy_to(const OmpExecutor *, size_type num_bytes, } -void HipExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void HipExecutor::raw_copy_to(const CudaExecutor* dest, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { #if GINKGO_HIP_PLATFORM_NVCC == 1 if (num_bytes > 0) { @@ -184,15 +184,15 @@ void HipExecutor::raw_copy_to(const CudaExecutor *dest, size_type num_bytes, } -void HipExecutor::raw_copy_to(const DpcppExecutor *dest, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void HipExecutor::raw_copy_to(const DpcppExecutor* dest, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { GKO_NOT_SUPPORTED(dest); } -void HipExecutor::raw_copy_to(const HipExecutor *dest, size_type num_bytes, - const void *src_ptr, void *dest_ptr) const +void HipExecutor::raw_copy_to(const HipExecutor* dest, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { if (num_bytes > 0) { hip::device_guard g(this->get_device_id()); @@ -210,7 +210,7 @@ void HipExecutor::synchronize() const } -void HipExecutor::run(const Operation &op) const +void HipExecutor::run(const Operation& op) const { this->template log(this, &op); hip::device_guard g(this->get_device_id()); @@ -287,12 +287,12 @@ void HipExecutor::init_handles() const auto id = this->get_device_id(); hip::device_guard g(id); this->hipblas_handle_ = handle_manager( - kernels::hip::hipblas::init(), [id](hipblasContext *handle) { + kernels::hip::hipblas::init(), [id](hipblasContext* handle) { hip::device_guard g(id); kernels::hip::hipblas::destroy_hipblas_handle(handle); }); this->hipsparse_handle_ = handle_manager( - kernels::hip::hipsparse::init(), [id](hipsparseContext *handle) { + kernels::hip::hipsparse::init(), [id](hipsparseContext* handle) { hip::device_guard g(id); kernels::hip::hipsparse::destroy_hipsparse_handle(handle); }); diff --git a/hip/base/hipblas_bindings.hip.hpp b/hip/base/hipblas_bindings.hip.hpp index 3889df69ff1..1644ae7add0 100644 --- a/hip/base/hipblas_bindings.hip.hpp +++ b/hip/base/hipblas_bindings.hip.hpp @@ -73,7 +73,7 @@ namespace detail { template -inline int64 not_implemented(Args &&...) +inline int64 not_implemented(Args&&...) { return static_cast(HIPBLAS_STATUS_NOT_SUPPORTED); } @@ -101,9 +101,9 @@ struct is_supported> : std::true_type {}; #define GKO_BIND_HIPBLAS_GEMM(ValueType, HipblasName) \ inline void gemm(hipblasHandle_t handle, hipblasOperation_t transa, \ hipblasOperation_t transb, int m, int n, int k, \ - const ValueType *alpha, const ValueType *a, int lda, \ - const ValueType *b, int ldb, const ValueType *beta, \ - ValueType *c, int ldc) \ + const ValueType* alpha, const ValueType* a, int lda, \ + const ValueType* b, int ldb, const ValueType* beta, \ + ValueType* c, int ldc) \ { \ GKO_ASSERT_NO_HIPBLAS_ERRORS(HipblasName( \ handle, transa, transb, m, n, k, as_hipblas_type(alpha), \ @@ -128,9 +128,9 @@ GKO_BIND_HIPBLAS_GEMM(ValueType, detail::not_implemented); #define GKO_BIND_HIPBLAS_GEAM(ValueType, HipblasName) \ inline void geam(hipblasHandle_t handle, hipblasOperation_t transa, \ hipblasOperation_t transb, int m, int n, \ - const ValueType *alpha, const ValueType *a, int lda, \ - const ValueType *beta, const ValueType *b, int ldb, \ - ValueType *c, int ldc) \ + const ValueType* alpha, const ValueType* a, int lda, \ + const ValueType* beta, const ValueType* b, int ldb, \ + ValueType* c, int ldc) \ { \ GKO_ASSERT_NO_HIPBLAS_ERRORS( \ HipblasName(handle, transa, transb, m, n, as_hipblas_type(alpha), \ @@ -151,8 +151,8 @@ GKO_BIND_HIPBLAS_GEAM(ValueType, detail::not_implemented); #define GKO_BIND_HIPBLAS_SCAL(ValueType, HipblasName) \ - inline void scal(hipblasHandle_t handle, int n, const ValueType *alpha, \ - ValueType *x, int incx) \ + inline void scal(hipblasHandle_t handle, int n, const ValueType* alpha, \ + ValueType* x, int incx) \ { \ GKO_ASSERT_NO_HIPBLAS_ERRORS(HipblasName( \ handle, n, as_hipblas_type(alpha), as_hipblas_type(x), incx)); \ @@ -173,8 +173,8 @@ GKO_BIND_HIPBLAS_SCAL(ValueType, detail::not_implemented); #define GKO_BIND_HIPBLAS_AXPY(ValueType, HipblasName) \ - inline void axpy(hipblasHandle_t handle, int n, const ValueType *alpha, \ - const ValueType *x, int incx, ValueType *y, int incy) \ + inline void axpy(hipblasHandle_t handle, int n, const ValueType* alpha, \ + const ValueType* x, int incx, ValueType* y, int incy) \ { \ GKO_ASSERT_NO_HIPBLAS_ERRORS( \ HipblasName(handle, n, as_hipblas_type(alpha), as_hipblas_type(x), \ @@ -196,8 +196,8 @@ GKO_BIND_HIPBLAS_AXPY(ValueType, detail::not_implemented); #define GKO_BIND_HIPBLAS_DOT(ValueType, HipblasName) \ - inline void dot(hipblasHandle_t handle, int n, const ValueType *x, \ - int incx, const ValueType *y, int incy, ValueType *result) \ + inline void dot(hipblasHandle_t handle, int n, const ValueType* x, \ + int incx, const ValueType* y, int incy, ValueType* result) \ { \ GKO_ASSERT_NO_HIPBLAS_ERRORS( \ HipblasName(handle, n, as_hipblas_type(x), incx, \ @@ -219,9 +219,9 @@ GKO_BIND_HIPBLAS_DOT(ValueType, detail::not_implemented); #define GKO_BIND_HIPBLAS_CONJ_DOT(ValueType, HipblasName) \ - inline void conj_dot(hipblasHandle_t handle, int n, const ValueType *x, \ - int incx, const ValueType *y, int incy, \ - ValueType *result) \ + inline void conj_dot(hipblasHandle_t handle, int n, const ValueType* x, \ + int incx, const ValueType* y, int incy, \ + ValueType* result) \ { \ GKO_ASSERT_NO_HIPBLAS_ERRORS( \ HipblasName(handle, n, as_hipblas_type(x), incx, \ @@ -243,8 +243,8 @@ GKO_BIND_HIPBLAS_CONJ_DOT(ValueType, detail::not_implemented); #define GKO_BIND_HIPBLAS_NORM2(ValueType, HipblasName) \ - inline void norm2(hipblasHandle_t handle, int n, const ValueType *x, \ - int incx, remove_complex *result) \ + inline void norm2(hipblasHandle_t handle, int n, const ValueType* x, \ + int incx, remove_complex* result) \ { \ GKO_ASSERT_NO_HIPBLAS_ERRORS(HipblasName( \ handle, n, as_hipblas_type(x), incx, as_hipblas_type(result))); \ @@ -264,17 +264,17 @@ GKO_BIND_HIPBLAS_NORM2(ValueType, detail::not_implemented); #undef GKO_BIND_HIPBLAS_NORM2 -inline hipblasContext *init() +inline hipblasContext* init() { hipblasHandle_t handle; GKO_ASSERT_NO_HIPBLAS_ERRORS(hipblasCreate(&handle)); GKO_ASSERT_NO_HIPBLAS_ERRORS( hipblasSetPointerMode(handle, HIPBLAS_POINTER_MODE_DEVICE)); - return reinterpret_cast(handle); + return reinterpret_cast(handle); } -inline void destroy_hipblas_handle(hipblasContext *handle) +inline void destroy_hipblas_handle(hipblasContext* handle) { GKO_ASSERT_NO_HIPBLAS_ERRORS( hipblasDestroy(reinterpret_cast(handle))); diff --git a/hip/base/hiprand_bindings.hip.hpp b/hip/base/hiprand_bindings.hip.hpp index f2f54313bad..60bdaf6b899 100644 --- a/hip/base/hiprand_bindings.hip.hpp +++ b/hip/base/hiprand_bindings.hip.hpp @@ -83,12 +83,12 @@ inline hiprandGenerator_t rand_generator(int64 seed, #define GKO_BIND_HIPRAND_RANDOM_VECTOR(ValueType, HiprandName) \ inline void rand_vector( \ - hiprandGenerator_t &gen, int n, remove_complex mean, \ - remove_complex stddev, ValueType *values) \ + hiprandGenerator_t& gen, int n, remove_complex mean, \ + remove_complex stddev, ValueType* values) \ { \ n = is_complex() ? 2 * n : n; \ GKO_ASSERT_NO_HIPRAND_ERRORS(HiprandName( \ - gen, reinterpret_cast *>(values), n, \ + gen, reinterpret_cast*>(values), n, \ mean, stddev)); \ } \ static_assert(true, \ diff --git a/hip/base/hipsparse_bindings.hip.hpp b/hip/base/hipsparse_bindings.hip.hpp index 8a275631fc6..5dce48b1acc 100644 --- a/hip/base/hipsparse_bindings.hip.hpp +++ b/hip/base/hipsparse_bindings.hip.hpp @@ -83,11 +83,11 @@ struct is_supported : std::true_type {}; #define GKO_BIND_HIPSPARSE32_SPMV(ValueType, HipsparseName) \ inline void spmv(hipsparseHandle_t handle, hipsparseOperation_t transA, \ - int32 m, int32 n, int32 nnz, const ValueType *alpha, \ + int32 m, int32 n, int32 nnz, const ValueType* alpha, \ const hipsparseMatDescr_t descrA, \ - const ValueType *csrValA, const int32 *csrRowPtrA, \ - const int32 *csrColIndA, const ValueType *x, \ - const ValueType *beta, ValueType *y) \ + const ValueType* csrValA, const int32* csrRowPtrA, \ + const int32* csrColIndA, const ValueType* x, \ + const ValueType* beta, ValueType* y) \ { \ GKO_ASSERT_NO_HIPSPARSE_ERRORS(HipsparseName( \ handle, transA, m, n, nnz, as_hiplibs_type(alpha), descrA, \ @@ -100,11 +100,11 @@ struct is_supported : std::true_type {}; #define GKO_BIND_HIPSPARSE64_SPMV(ValueType, HipsparseName) \ inline void spmv(hipsparseHandle_t handle, hipsparseOperation_t transA, \ - int64 m, int64 n, int64 nnz, const ValueType *alpha, \ + int64 m, int64 n, int64 nnz, const ValueType* alpha, \ const hipsparseMatDescr_t descrA, \ - const ValueType *csrValA, const int64 *csrRowPtrA, \ - const int64 *csrColIndA, const ValueType *x, \ - const ValueType *beta, ValueType *y) GKO_NOT_IMPLEMENTED; \ + const ValueType* csrValA, const int64* csrRowPtrA, \ + const int64* csrColIndA, const ValueType* x, \ + const ValueType* beta, ValueType* y) GKO_NOT_IMPLEMENTED; \ static_assert(true, \ "This assert is used to counter the false positive extra " \ "semi-colon warnings") @@ -126,10 +126,10 @@ GKO_BIND_HIPSPARSE64_SPMV(ValueType, detail::not_implemented); #define GKO_BIND_HIPSPARSE32_SPMM(ValueType, HipsparseName) \ inline void spmm(hipsparseHandle_t handle, hipsparseOperation_t transA, \ int32 m, int32 n, int32 k, int32 nnz, \ - const ValueType *alpha, const hipsparseMatDescr_t descrA, \ - const ValueType *csrValA, const int32 *csrRowPtrA, \ - const int32 *csrColIndA, const ValueType *B, int32 ldb, \ - const ValueType *beta, ValueType *C, int32 ldc) \ + const ValueType* alpha, const hipsparseMatDescr_t descrA, \ + const ValueType* csrValA, const int32* csrRowPtrA, \ + const int32* csrColIndA, const ValueType* B, int32 ldb, \ + const ValueType* beta, ValueType* C, int32 ldc) \ { \ GKO_ASSERT_NO_HIPSPARSE_ERRORS(HipsparseName( \ handle, transA, m, n, k, nnz, as_hiplibs_type(alpha), descrA, \ @@ -144,10 +144,10 @@ GKO_BIND_HIPSPARSE64_SPMV(ValueType, detail::not_implemented); #define GKO_BIND_HIPSPARSE64_SPMM(ValueType, HipsparseName) \ inline void spmm(hipsparseHandle_t handle, hipsparseOperation_t transA, \ int64 m, int64 n, int64 k, int64 nnz, \ - const ValueType *alpha, const hipsparseMatDescr_t descrA, \ - const ValueType *csrValA, const int64 *csrRowPtrA, \ - const int64 *csrColIndA, const ValueType *B, int64 ldb, \ - const ValueType *beta, ValueType *C, int64 ldc) \ + const ValueType* alpha, const hipsparseMatDescr_t descrA, \ + const ValueType* csrValA, const int64* csrRowPtrA, \ + const int64* csrColIndA, const ValueType* B, int64 ldb, \ + const ValueType* beta, ValueType* C, int64 ldc) \ GKO_NOT_IMPLEMENTED; \ static_assert(true, \ "This assert is used to counter the false positive extra " \ @@ -169,9 +169,9 @@ GKO_BIND_HIPSPARSE64_SPMM(ValueType, detail::not_implemented); #define GKO_BIND_HIPSPARSE32_SPMV(ValueType, HipsparseName) \ inline void spmv(hipsparseHandle_t handle, hipsparseOperation_t transA, \ - const ValueType *alpha, const hipsparseMatDescr_t descrA, \ - const hipsparseHybMat_t hybA, const ValueType *x, \ - const ValueType *beta, ValueType *y) \ + const ValueType* alpha, const hipsparseMatDescr_t descrA, \ + const hipsparseHybMat_t hybA, const ValueType* x, \ + const ValueType* beta, ValueType* y) \ { \ GKO_ASSERT_NO_HIPSPARSE_ERRORS(HipsparseName( \ handle, transA, as_hiplibs_type(alpha), descrA, hybA, \ @@ -193,24 +193,24 @@ GKO_BIND_HIPSPARSE32_SPMV(ValueType, detail::not_implemented); template void spgemm_buffer_size( hipsparseHandle_t handle, IndexType m, IndexType n, IndexType k, - const ValueType *alpha, const hipsparseMatDescr_t descrA, IndexType nnzA, - const IndexType *csrRowPtrA, const IndexType *csrColIndA, + const ValueType* alpha, const hipsparseMatDescr_t descrA, IndexType nnzA, + const IndexType* csrRowPtrA, const IndexType* csrColIndA, const hipsparseMatDescr_t descrB, IndexType nnzB, - const IndexType *csrRowPtrB, const IndexType *csrColIndB, - const ValueType *beta, const hipsparseMatDescr_t descrD, IndexType nnzD, - const IndexType *csrRowPtrD, const IndexType *csrColIndD, - csrgemm2Info_t info, size_type &result) GKO_NOT_IMPLEMENTED; + const IndexType* csrRowPtrB, const IndexType* csrColIndB, + const ValueType* beta, const hipsparseMatDescr_t descrD, IndexType nnzD, + const IndexType* csrRowPtrD, const IndexType* csrColIndD, + csrgemm2Info_t info, size_type& result) GKO_NOT_IMPLEMENTED; #define GKO_BIND_HIPSPARSE_SPGEMM_BUFFER_SIZE(ValueType, HipsparseName) \ template <> \ inline void spgemm_buffer_size( \ hipsparseHandle_t handle, int32 m, int32 n, int32 k, \ - const ValueType *alpha, const hipsparseMatDescr_t descrA, int32 nnzA, \ - const int32 *csrRowPtrA, const int32 *csrColIndA, \ - const hipsparseMatDescr_t descrB, int32 nnzB, const int32 *csrRowPtrB, \ - const int32 *csrColIndB, const ValueType *beta, \ - const hipsparseMatDescr_t descrD, int32 nnzD, const int32 *csrRowPtrD, \ - const int32 *csrColIndD, csrgemm2Info_t info, size_type &result) \ + const ValueType* alpha, const hipsparseMatDescr_t descrA, int32 nnzA, \ + const int32* csrRowPtrA, const int32* csrColIndA, \ + const hipsparseMatDescr_t descrB, int32 nnzB, const int32* csrRowPtrB, \ + const int32* csrColIndB, const ValueType* beta, \ + const hipsparseMatDescr_t descrD, int32 nnzD, const int32* csrRowPtrD, \ + const int32* csrColIndD, csrgemm2Info_t info, size_type& result) \ { \ GKO_ASSERT_NO_HIPSPARSE_ERRORS(HipsparseName( \ handle, m, n, k, as_hiplibs_type(alpha), descrA, nnzA, csrRowPtrA, \ @@ -240,24 +240,24 @@ GKO_BIND_HIPSPARSE_SPGEMM_BUFFER_SIZE(std::complex, template void spgemm_nnz(hipsparseHandle_t handle, IndexType m, IndexType n, IndexType k, const hipsparseMatDescr_t descrA, IndexType nnzA, - const IndexType *csrRowPtrA, const IndexType *csrColIndA, + const IndexType* csrRowPtrA, const IndexType* csrColIndA, const hipsparseMatDescr_t descrB, IndexType nnzB, - const IndexType *csrRowPtrB, const IndexType *csrColIndB, + const IndexType* csrRowPtrB, const IndexType* csrColIndB, const hipsparseMatDescr_t descrD, IndexType nnzD, - const IndexType *csrRowPtrD, const IndexType *csrColIndD, - const hipsparseMatDescr_t descrC, IndexType *csrRowPtrC, - IndexType *nnzC, csrgemm2Info_t info, - void *buffer) GKO_NOT_IMPLEMENTED; + const IndexType* csrRowPtrD, const IndexType* csrColIndD, + const hipsparseMatDescr_t descrC, IndexType* csrRowPtrC, + IndexType* nnzC, csrgemm2Info_t info, + void* buffer) GKO_NOT_IMPLEMENTED; template <> inline void spgemm_nnz( hipsparseHandle_t handle, int32 m, int32 n, int32 k, - const hipsparseMatDescr_t descrA, int32 nnzA, const int32 *csrRowPtrA, - const int32 *csrColIndA, const hipsparseMatDescr_t descrB, int32 nnzB, - const int32 *csrRowPtrB, const int32 *csrColIndB, - const hipsparseMatDescr_t descrD, int32 nnzD, const int32 *csrRowPtrD, - const int32 *csrColIndD, const hipsparseMatDescr_t descrC, - int32 *csrRowPtrC, int32 *nnzC, csrgemm2Info_t info, void *buffer) + const hipsparseMatDescr_t descrA, int32 nnzA, const int32* csrRowPtrA, + const int32* csrColIndA, const hipsparseMatDescr_t descrB, int32 nnzB, + const int32* csrRowPtrB, const int32* csrColIndB, + const hipsparseMatDescr_t descrD, int32 nnzD, const int32* csrRowPtrD, + const int32* csrColIndD, const hipsparseMatDescr_t descrC, + int32* csrRowPtrC, int32* nnzC, csrgemm2Info_t info, void* buffer) { GKO_ASSERT_NO_HIPSPARSE_ERRORS(hipsparseXcsrgemm2Nnz( handle, m, n, k, descrA, nnzA, csrRowPtrA, csrColIndA, descrB, nnzB, @@ -268,33 +268,33 @@ inline void spgemm_nnz( template void spgemm(hipsparseHandle_t handle, IndexType m, IndexType n, IndexType k, - const ValueType *alpha, const hipsparseMatDescr_t descrA, - IndexType nnzA, const ValueType *csrValA, - const IndexType *csrRowPtrA, const IndexType *csrColIndA, + const ValueType* alpha, const hipsparseMatDescr_t descrA, + IndexType nnzA, const ValueType* csrValA, + const IndexType* csrRowPtrA, const IndexType* csrColIndA, const hipsparseMatDescr_t descrB, IndexType nnzB, - const ValueType *csrValB, const IndexType *csrRowPtrB, - const IndexType *csrColIndB, const ValueType *beta, + const ValueType* csrValB, const IndexType* csrRowPtrB, + const IndexType* csrColIndB, const ValueType* beta, const hipsparseMatDescr_t descrD, IndexType nnzD, - const ValueType *csrValD, const IndexType *csrRowPtrD, - const IndexType *csrColIndD, const hipsparseMatDescr_t descrC, - ValueType *csrValC, const IndexType *csrRowPtrC, - IndexType *csrColIndC, csrgemm2Info_t info, - void *buffer) GKO_NOT_IMPLEMENTED; + const ValueType* csrValD, const IndexType* csrRowPtrD, + const IndexType* csrColIndD, const hipsparseMatDescr_t descrC, + ValueType* csrValC, const IndexType* csrRowPtrC, + IndexType* csrColIndC, csrgemm2Info_t info, + void* buffer) GKO_NOT_IMPLEMENTED; #define GKO_BIND_HIPSPARSE_SPGEMM(ValueType, HipsparseName) \ template <> \ inline void spgemm( \ hipsparseHandle_t handle, int32 m, int32 n, int32 k, \ - const ValueType *alpha, const hipsparseMatDescr_t descrA, int32 nnzA, \ - const ValueType *csrValA, const int32 *csrRowPtrA, \ - const int32 *csrColIndA, const hipsparseMatDescr_t descrB, int32 nnzB, \ - const ValueType *csrValB, const int32 *csrRowPtrB, \ - const int32 *csrColIndB, const ValueType *beta, \ + const ValueType* alpha, const hipsparseMatDescr_t descrA, int32 nnzA, \ + const ValueType* csrValA, const int32* csrRowPtrA, \ + const int32* csrColIndA, const hipsparseMatDescr_t descrB, int32 nnzB, \ + const ValueType* csrValB, const int32* csrRowPtrB, \ + const int32* csrColIndB, const ValueType* beta, \ const hipsparseMatDescr_t descrD, int32 nnzD, \ - const ValueType *csrValD, const int32 *csrRowPtrD, \ - const int32 *csrColIndD, const hipsparseMatDescr_t descrC, \ - ValueType *csrValC, const int32 *csrRowPtrC, int32 *csrColIndC, \ - csrgemm2Info_t info, void *buffer) \ + const ValueType* csrValD, const int32* csrRowPtrD, \ + const int32* csrColIndD, const hipsparseMatDescr_t descrC, \ + ValueType* csrValC, const int32* csrRowPtrC, int32* csrColIndC, \ + csrgemm2Info_t info, void* buffer) \ { \ GKO_ASSERT_NO_HIPSPARSE_ERRORS(HipsparseName( \ handle, m, n, k, as_hiplibs_type(alpha), descrA, nnzA, \ @@ -324,8 +324,8 @@ GKO_BIND_HIPSPARSE_SPGEMM(std::complex, hipsparseZcsrgemm2); #define GKO_BIND_HIPSPARSE32_CSR2HYB(ValueType, HipsparseName) \ inline void csr2hyb(hipsparseHandle_t handle, int32 m, int32 n, \ const hipsparseMatDescr_t descrA, \ - const ValueType *csrValA, const int32 *csrRowPtrA, \ - const int32 *csrColIndA, hipsparseHybMat_t hybA, \ + const ValueType* csrValA, const int32* csrRowPtrA, \ + const int32* csrColIndA, hipsparseHybMat_t hybA, \ int32 userEllWidth, \ hipsparseHybPartition_t partitionType) \ { \ @@ -340,8 +340,8 @@ GKO_BIND_HIPSPARSE_SPGEMM(std::complex, hipsparseZcsrgemm2); #define GKO_BIND_HIPSPARSE64_CSR2HYB(ValueType, HipsparseName) \ inline void csr2hyb( \ hipsparseHandle_t handle, int64 m, int64 n, \ - const hipsparseMatDescr_t descrA, const ValueType *csrValA, \ - const int64 *csrRowPtrA, const int64 *csrColIndA, \ + const hipsparseMatDescr_t descrA, const ValueType* csrValA, \ + const int64* csrRowPtrA, const int64* csrColIndA, \ hipsparseHybMat_t hybA, int64 userEllWidth, \ hipsparseHybPartition_t partitionType) GKO_NOT_IMPLEMENTED; \ static_assert(true, \ @@ -364,10 +364,10 @@ GKO_BIND_HIPSPARSE64_CSR2HYB(ValueType, detail::not_implemented); #define GKO_BIND_HIPSPARSE_TRANSPOSE32(ValueType, HipsparseName) \ inline void transpose(hipsparseHandle_t handle, size_type m, size_type n, \ - size_type nnz, const ValueType *OrigValA, \ - const int32 *OrigRowPtrA, const int32 *OrigColIndA, \ - ValueType *TransValA, int32 *TransRowPtrA, \ - int32 *TransColIndA, hipsparseAction_t copyValues, \ + size_type nnz, const ValueType* OrigValA, \ + const int32* OrigRowPtrA, const int32* OrigColIndA, \ + ValueType* TransValA, int32* TransRowPtrA, \ + int32* TransColIndA, hipsparseAction_t copyValues, \ hipsparseIndexBase_t idxBase) \ { \ GKO_ASSERT_NO_HIPSPARSE_ERRORS(HipsparseName( \ @@ -381,10 +381,10 @@ GKO_BIND_HIPSPARSE64_CSR2HYB(ValueType, detail::not_implemented); #define GKO_BIND_HIPSPARSE_TRANSPOSE64(ValueType, HipsparseName) \ inline void transpose(hipsparseHandle_t handle, size_type m, size_type n, \ - size_type nnz, const ValueType *OrigValA, \ - const int64 *OrigRowPtrA, const int64 *OrigColIndA, \ - ValueType *TransValA, int64 *TransRowPtrA, \ - int64 *TransColIndA, hipsparseAction_t copyValues, \ + size_type nnz, const ValueType* OrigValA, \ + const int64* OrigRowPtrA, const int64* OrigColIndA, \ + ValueType* TransValA, int64* TransRowPtrA, \ + int64* TransColIndA, hipsparseAction_t copyValues, \ hipsparseIndexBase_t idxBase) GKO_NOT_IMPLEMENTED; \ static_assert(true, \ "This assert is used to counter the false positive extra " \ @@ -404,9 +404,9 @@ GKO_BIND_HIPSPARSE_TRANSPOSE64(ValueType, detail::not_implemented); #define GKO_BIND_HIPSPARSE_CONJ_TRANSPOSE32(ValueType, HipsparseName) \ inline void conj_transpose( \ hipsparseHandle_t handle, size_type m, size_type n, size_type nnz, \ - const ValueType *OrigValA, const int32 *OrigRowPtrA, \ - const int32 *OrigColIndA, ValueType *TransValA, int32 *TransRowPtrA, \ - int32 *TransColIndA, hipsparseAction_t copyValues, \ + const ValueType* OrigValA, const int32* OrigRowPtrA, \ + const int32* OrigColIndA, ValueType* TransValA, int32* TransRowPtrA, \ + int32* TransColIndA, hipsparseAction_t copyValues, \ hipsparseIndexBase_t idxBase) GKO_NOT_IMPLEMENTED; \ static_assert(true, \ "This assert is used to counter the false positive extra " \ @@ -415,9 +415,9 @@ GKO_BIND_HIPSPARSE_TRANSPOSE64(ValueType, detail::not_implemented); #define GKO_BIND_HIPSPARSE_CONJ_TRANSPOSE64(ValueType, HipsparseName) \ inline void conj_transpose( \ hipsparseHandle_t handle, size_type m, size_type n, size_type nnz, \ - const ValueType *OrigValA, const int64 *OrigRowPtrA, \ - const int64 *OrigColIndA, ValueType *TransValA, int64 *TransRowPtrA, \ - int64 *TransColIndA, hipsparseAction_t copyValues, \ + const ValueType* OrigValA, const int64* OrigRowPtrA, \ + const int64* OrigColIndA, ValueType* TransValA, int64* TransRowPtrA, \ + int64* TransColIndA, hipsparseAction_t copyValues, \ hipsparseIndexBase_t idxBase) GKO_NOT_IMPLEMENTED; \ static_assert(true, \ "This assert is used to counter the false positive extra " \ @@ -439,13 +439,13 @@ GKO_BIND_HIPSPARSE_CONJ_TRANSPOSE64(ValueType, detail::not_implemented); inline void csrsv2_buffer_size( \ hipsparseHandle_t handle, hipsparseOperation_t trans, \ const size_type m, size_type nnz, const hipsparseMatDescr_t descr, \ - const ValueType *csrVal, const int32 *csrRowPtr, \ - const int32 *csrColInd, csrsv2Info_t factor_info, \ - int *factor_work_size) \ + const ValueType* csrVal, const int32* csrRowPtr, \ + const int32* csrColInd, csrsv2Info_t factor_info, \ + int* factor_work_size) \ { \ GKO_ASSERT_NO_HIPSPARSE_ERRORS(HipsparseName( \ handle, trans, m, nnz, descr, \ - as_hiplibs_type(const_cast(csrVal)), csrRowPtr, \ + as_hiplibs_type(const_cast(csrVal)), csrRowPtr, \ csrColInd, factor_info, factor_work_size)); \ } \ static_assert(true, \ @@ -456,9 +456,9 @@ GKO_BIND_HIPSPARSE_CONJ_TRANSPOSE64(ValueType, detail::not_implemented); inline void csrsv2_buffer_size( \ hipsparseHandle_t handle, hipsparseOperation_t trans, size_type m, \ size_type nnz, const hipsparseMatDescr_t descr, \ - const ValueType *csrVal, const int64 *csrRowPtr, \ - const int64 *csrColInd, csrsv2Info_t factor_info, \ - int *factor_work_size) GKO_NOT_IMPLEMENTED; \ + const ValueType* csrVal, const int64* csrRowPtr, \ + const int64* csrColInd, csrsv2Info_t factor_info, \ + int* factor_work_size) GKO_NOT_IMPLEMENTED; \ static_assert(true, \ "This assert is used to counter the " \ "false positive extra " \ @@ -479,9 +479,9 @@ GKO_BIND_HIPSPARSE64_CSRSV2_BUFFERSIZE(ValueType, detail::not_implemented); inline void csrsv2_analysis( \ hipsparseHandle_t handle, hipsparseOperation_t trans, size_type m, \ size_type nnz, const hipsparseMatDescr_t descr, \ - const ValueType *csrVal, const int32 *csrRowPtr, \ - const int32 *csrColInd, csrsv2Info_t factor_info, \ - hipsparseSolvePolicy_t policy, void *factor_work_vec) \ + const ValueType* csrVal, const int32* csrRowPtr, \ + const int32* csrColInd, csrsv2Info_t factor_info, \ + hipsparseSolvePolicy_t policy, void* factor_work_vec) \ { \ GKO_ASSERT_NO_HIPSPARSE_ERRORS(HipsparseName( \ handle, trans, m, nnz, descr, as_hiplibs_type(csrVal), csrRowPtr, \ @@ -495,9 +495,9 @@ GKO_BIND_HIPSPARSE64_CSRSV2_BUFFERSIZE(ValueType, detail::not_implemented); inline void csrsv2_analysis( \ hipsparseHandle_t handle, hipsparseOperation_t trans, size_type m, \ size_type nnz, const hipsparseMatDescr_t descr, \ - const ValueType *csrVal, const int64 *csrRowPtr, \ - const int64 *csrColInd, csrsv2Info_t factor_info, \ - hipsparseSolvePolicy_t policy, void *factor_work_vec) \ + const ValueType* csrVal, const int64* csrRowPtr, \ + const int64* csrColInd, csrsv2Info_t factor_info, \ + hipsparseSolvePolicy_t policy, void* factor_work_vec) \ GKO_NOT_IMPLEMENTED; \ static_assert(true, \ "This assert is used to counter the " \ @@ -518,11 +518,11 @@ GKO_BIND_HIPSPARSE64_CSRSV2_ANALYSIS(ValueType, detail::not_implemented); #define GKO_BIND_HIPSPARSE32_CSRSV2_SOLVE(ValueType, HipsparseName) \ inline void csrsv2_solve( \ hipsparseHandle_t handle, hipsparseOperation_t trans, size_type m, \ - size_type nnz, const ValueType *one, const hipsparseMatDescr_t descr, \ - const ValueType *csrVal, const int32 *csrRowPtr, \ - const int32 *csrColInd, csrsv2Info_t factor_info, \ - const ValueType *rhs, ValueType *sol, hipsparseSolvePolicy_t policy, \ - void *factor_work_vec) \ + size_type nnz, const ValueType* one, const hipsparseMatDescr_t descr, \ + const ValueType* csrVal, const int32* csrRowPtr, \ + const int32* csrColInd, csrsv2Info_t factor_info, \ + const ValueType* rhs, ValueType* sol, hipsparseSolvePolicy_t policy, \ + void* factor_work_vec) \ { \ GKO_ASSERT_NO_HIPSPARSE_ERRORS( \ HipsparseName(handle, trans, m, nnz, as_hiplibs_type(one), descr, \ @@ -537,11 +537,11 @@ GKO_BIND_HIPSPARSE64_CSRSV2_ANALYSIS(ValueType, detail::not_implemented); #define GKO_BIND_HIPSPARSE64_CSRSV2_SOLVE(ValueType, HipsparseName) \ inline void csrsv2_solve( \ hipsparseHandle_t handle, hipsparseOperation_t trans, size_type m, \ - size_type nnz, const ValueType *one, const hipsparseMatDescr_t descr, \ - const ValueType *csrVal, const int64 *csrRowPtr, \ - const int64 *csrColInd, csrsv2Info_t factor_info, \ - const ValueType *rhs, ValueType *sol, hipsparseSolvePolicy_t policy, \ - void *factor_work_vec) GKO_NOT_IMPLEMENTED; \ + size_type nnz, const ValueType* one, const hipsparseMatDescr_t descr, \ + const ValueType* csrVal, const int64* csrRowPtr, \ + const int64* csrColInd, csrsv2Info_t factor_info, \ + const ValueType* rhs, ValueType* sol, hipsparseSolvePolicy_t policy, \ + void* factor_work_vec) GKO_NOT_IMPLEMENTED; \ static_assert(true, \ "This assert is used to counter the false positive extra " \ "semi-colon warnings") @@ -558,17 +558,17 @@ GKO_BIND_HIPSPARSE64_CSRSV2_SOLVE(ValueType, detail::not_implemented); #undef GKO_BIND_HIPSPARSE64_CSRSV2_SOLVE -inline hipsparseContext *init() +inline hipsparseContext* init() { hipsparseHandle_t handle{}; GKO_ASSERT_NO_HIPSPARSE_ERRORS(hipsparseCreate(&handle)); GKO_ASSERT_NO_HIPSPARSE_ERRORS( hipsparseSetPointerMode(handle, HIPSPARSE_POINTER_MODE_DEVICE)); - return reinterpret_cast(handle); + return reinterpret_cast(handle); } -inline void destroy_hipsparse_handle(hipsparseContext *handle) +inline void destroy_hipsparse_handle(hipsparseContext* handle) { GKO_ASSERT_NO_HIPSPARSE_ERRORS( hipsparseDestroy(reinterpret_cast(handle))); @@ -633,11 +633,11 @@ inline void destroy_ic0_info(csric02Info_t info) template void create_identity_permutation(hipsparseHandle_t handle, IndexType size, - IndexType *permutation) GKO_NOT_IMPLEMENTED; + IndexType* permutation) GKO_NOT_IMPLEMENTED; template <> inline void create_identity_permutation(hipsparseHandle_t handle, - int32 size, int32 *permutation) + int32 size, int32* permutation) { GKO_ASSERT_NO_HIPSPARSE_ERRORS( hipsparseCreateIdentityPermutation(handle, size, permutation)); @@ -646,16 +646,16 @@ inline void create_identity_permutation(hipsparseHandle_t handle, template void csrsort_buffer_size(hipsparseHandle_t handle, IndexType m, IndexType n, - IndexType nnz, const IndexType *row_ptrs, - const IndexType *col_idxs, - size_type &buffer_size) GKO_NOT_IMPLEMENTED; + IndexType nnz, const IndexType* row_ptrs, + const IndexType* col_idxs, + size_type& buffer_size) GKO_NOT_IMPLEMENTED; template <> inline void csrsort_buffer_size(hipsparseHandle_t handle, int32 m, int32 n, int32 nnz, - const int32 *row_ptrs, - const int32 *col_idxs, - size_type &buffer_size) + const int32* row_ptrs, + const int32* col_idxs, + size_type& buffer_size) { GKO_ASSERT_NO_HIPSPARSE_ERRORS(hipsparseXcsrsort_bufferSizeExt( handle, m, n, nnz, row_ptrs, col_idxs, &buffer_size)); @@ -664,15 +664,15 @@ inline void csrsort_buffer_size(hipsparseHandle_t handle, int32 m, template void csrsort(hipsparseHandle_t handle, IndexType m, IndexType n, IndexType nnz, - const hipsparseMatDescr_t descr, const IndexType *row_ptrs, - IndexType *col_idxs, IndexType *permutation, - void *buffer) GKO_NOT_IMPLEMENTED; + const hipsparseMatDescr_t descr, const IndexType* row_ptrs, + IndexType* col_idxs, IndexType* permutation, + void* buffer) GKO_NOT_IMPLEMENTED; template <> inline void csrsort(hipsparseHandle_t handle, int32 m, int32 n, int32 nnz, const hipsparseMatDescr_t descr, - const int32 *row_ptrs, int32 *col_idxs, - int32 *permutation, void *buffer) + const int32* row_ptrs, int32* col_idxs, + int32* permutation, void* buffer) { GKO_ASSERT_NO_HIPSPARSE_ERRORS(hipsparseXcsrsort( handle, m, n, nnz, descr, row_ptrs, col_idxs, permutation, buffer)); @@ -680,14 +680,14 @@ inline void csrsort(hipsparseHandle_t handle, int32 m, int32 n, template -void gather(hipsparseHandle_t handle, IndexType nnz, const ValueType *in, - ValueType *out, const IndexType *permutation) GKO_NOT_IMPLEMENTED; +void gather(hipsparseHandle_t handle, IndexType nnz, const ValueType* in, + ValueType* out, const IndexType* permutation) GKO_NOT_IMPLEMENTED; #define GKO_BIND_HIPSPARSE_GATHER(ValueType, HipsparseName) \ template <> \ inline void gather(hipsparseHandle_t handle, int32 nnz, \ - const ValueType *in, ValueType *out, \ - const int32 *permutation) \ + const ValueType* in, ValueType* out, \ + const int32* permutation) \ { \ GKO_ASSERT_NO_HIPSPARSE_ERRORS(HipsparseName( \ handle, nnz, as_hiplibs_type(in), as_hiplibs_type(out), \ @@ -711,23 +711,23 @@ GKO_BIND_HIPSPARSE_GATHER(std::complex, hipsparseZgthr); template void ilu0_buffer_size(hipsparseHandle_t handle, IndexType m, IndexType nnz, - const hipsparseMatDescr_t descr, const ValueType *vals, - const IndexType *row_ptrs, const IndexType *col_idxs, + const hipsparseMatDescr_t descr, const ValueType* vals, + const IndexType* row_ptrs, const IndexType* col_idxs, csrilu02Info_t info, - size_type &buffer_size) GKO_NOT_IMPLEMENTED; + size_type& buffer_size) GKO_NOT_IMPLEMENTED; #define GKO_BIND_HIPSPARSE_ILU0_BUFFER_SIZE(ValueType, HipsparseName) \ template <> \ inline void ilu0_buffer_size( \ hipsparseHandle_t handle, int32 m, int32 nnz, \ - const hipsparseMatDescr_t descr, const ValueType *vals, \ - const int32 *row_ptrs, const int32 *col_idxs, csrilu02Info_t info, \ - size_type &buffer_size) \ + const hipsparseMatDescr_t descr, const ValueType* vals, \ + const int32* row_ptrs, const int32* col_idxs, csrilu02Info_t info, \ + size_type& buffer_size) \ { \ int tmp_buffer_size{}; \ GKO_ASSERT_NO_HIPSPARSE_ERRORS( \ HipsparseName(handle, m, nnz, descr, \ - as_hiplibs_type(const_cast(vals)), \ + as_hiplibs_type(const_cast(vals)), \ row_ptrs, col_idxs, info, &tmp_buffer_size)); \ buffer_size = tmp_buffer_size; \ } \ @@ -751,18 +751,18 @@ GKO_BIND_HIPSPARSE_ILU0_BUFFER_SIZE(std::complex, template void ilu0_analysis(hipsparseHandle_t handle, IndexType m, IndexType nnz, - const hipsparseMatDescr_t descr, const ValueType *vals, - const IndexType *row_ptrs, const IndexType *col_idxs, + const hipsparseMatDescr_t descr, const ValueType* vals, + const IndexType* row_ptrs, const IndexType* col_idxs, csrilu02Info_t info, hipsparseSolvePolicy_t policy, - void *buffer) GKO_NOT_IMPLEMENTED; + void* buffer) GKO_NOT_IMPLEMENTED; #define GKO_BIND_HIPSPARSE_ILU0_ANALYSIS(ValueType, HipsparseName) \ template <> \ inline void ilu0_analysis( \ hipsparseHandle_t handle, int32 m, int32 nnz, \ - const hipsparseMatDescr_t descr, const ValueType *vals, \ - const int32 *row_ptrs, const int32 *col_idxs, csrilu02Info_t info, \ - hipsparseSolvePolicy_t policy, void *buffer) \ + const hipsparseMatDescr_t descr, const ValueType* vals, \ + const int32* row_ptrs, const int32* col_idxs, csrilu02Info_t info, \ + hipsparseSolvePolicy_t policy, void* buffer) \ { \ GKO_ASSERT_NO_HIPSPARSE_ERRORS( \ HipsparseName(handle, m, nnz, descr, as_hiplibs_type(vals), \ @@ -788,18 +788,18 @@ GKO_BIND_HIPSPARSE_ILU0_ANALYSIS(std::complex, template void ilu0(hipsparseHandle_t handle, IndexType m, IndexType nnz, - const hipsparseMatDescr_t descr, ValueType *vals, - const IndexType *row_ptrs, const IndexType *col_idxs, + const hipsparseMatDescr_t descr, ValueType* vals, + const IndexType* row_ptrs, const IndexType* col_idxs, csrilu02Info_t info, hipsparseSolvePolicy_t policy, - void *buffer) GKO_NOT_IMPLEMENTED; + void* buffer) GKO_NOT_IMPLEMENTED; #define GKO_BIND_HIPSPARSE_ILU0(ValueType, HipsparseName) \ template <> \ inline void ilu0( \ hipsparseHandle_t handle, int32 m, int32 nnz, \ - const hipsparseMatDescr_t descr, ValueType *vals, \ - const int32 *row_ptrs, const int32 *col_idxs, csrilu02Info_t info, \ - hipsparseSolvePolicy_t policy, void *buffer) \ + const hipsparseMatDescr_t descr, ValueType* vals, \ + const int32* row_ptrs, const int32* col_idxs, csrilu02Info_t info, \ + hipsparseSolvePolicy_t policy, void* buffer) \ { \ GKO_ASSERT_NO_HIPSPARSE_ERRORS( \ HipsparseName(handle, m, nnz, descr, as_hiplibs_type(vals), \ @@ -823,23 +823,23 @@ GKO_BIND_HIPSPARSE_ILU0(std::complex, hipsparseZcsrilu02); template void ic0_buffer_size(hipsparseHandle_t handle, IndexType m, IndexType nnz, - const hipsparseMatDescr_t descr, const ValueType *vals, - const IndexType *row_ptrs, const IndexType *col_idxs, + const hipsparseMatDescr_t descr, const ValueType* vals, + const IndexType* row_ptrs, const IndexType* col_idxs, csric02Info_t info, - size_type &buffer_size) GKO_NOT_IMPLEMENTED; + size_type& buffer_size) GKO_NOT_IMPLEMENTED; #define GKO_BIND_HIPSPARSE_IC0_BUFFER_SIZE(ValueType, HipsparseName) \ template <> \ inline void ic0_buffer_size( \ hipsparseHandle_t handle, int32 m, int32 nnz, \ - const hipsparseMatDescr_t descr, const ValueType *vals, \ - const int32 *row_ptrs, const int32 *col_idxs, csric02Info_t info, \ - size_type &buffer_size) \ + const hipsparseMatDescr_t descr, const ValueType* vals, \ + const int32* row_ptrs, const int32* col_idxs, csric02Info_t info, \ + size_type& buffer_size) \ { \ int tmp_buffer_size{}; \ GKO_ASSERT_NO_HIPSPARSE_ERRORS( \ HipsparseName(handle, m, nnz, descr, \ - as_hiplibs_type(const_cast(vals)), \ + as_hiplibs_type(const_cast(vals)), \ row_ptrs, col_idxs, info, &tmp_buffer_size)); \ buffer_size = tmp_buffer_size; \ } \ @@ -863,18 +863,18 @@ GKO_BIND_HIPSPARSE_IC0_BUFFER_SIZE(std::complex, template void ic0_analysis(hipsparseHandle_t handle, IndexType m, IndexType nnz, - const hipsparseMatDescr_t descr, const ValueType *vals, - const IndexType *row_ptrs, const IndexType *col_idxs, + const hipsparseMatDescr_t descr, const ValueType* vals, + const IndexType* row_ptrs, const IndexType* col_idxs, csric02Info_t info, hipsparseSolvePolicy_t policy, - void *buffer) GKO_NOT_IMPLEMENTED; + void* buffer) GKO_NOT_IMPLEMENTED; #define GKO_BIND_HIPSPARSE_IC0_ANALYSIS(ValueType, HipsparseName) \ template <> \ inline void ic0_analysis( \ hipsparseHandle_t handle, int32 m, int32 nnz, \ - const hipsparseMatDescr_t descr, const ValueType *vals, \ - const int32 *row_ptrs, const int32 *col_idxs, csric02Info_t info, \ - hipsparseSolvePolicy_t policy, void *buffer) \ + const hipsparseMatDescr_t descr, const ValueType* vals, \ + const int32* row_ptrs, const int32* col_idxs, csric02Info_t info, \ + hipsparseSolvePolicy_t policy, void* buffer) \ { \ GKO_ASSERT_NO_HIPSPARSE_ERRORS( \ HipsparseName(handle, m, nnz, descr, as_hiplibs_type(vals), \ @@ -900,18 +900,18 @@ GKO_BIND_HIPSPARSE_IC0_ANALYSIS(std::complex, template void ic0(hipsparseHandle_t handle, IndexType m, IndexType nnz, - const hipsparseMatDescr_t descr, ValueType *vals, - const IndexType *row_ptrs, const IndexType *col_idxs, + const hipsparseMatDescr_t descr, ValueType* vals, + const IndexType* row_ptrs, const IndexType* col_idxs, csric02Info_t info, hipsparseSolvePolicy_t policy, - void *buffer) GKO_NOT_IMPLEMENTED; + void* buffer) GKO_NOT_IMPLEMENTED; #define GKO_BIND_HIPSPARSE_IC0(ValueType, HipsparseName) \ template <> \ inline void ic0( \ hipsparseHandle_t handle, int32 m, int32 nnz, \ - const hipsparseMatDescr_t descr, ValueType *vals, \ - const int32 *row_ptrs, const int32 *col_idxs, csric02Info_t info, \ - hipsparseSolvePolicy_t policy, void *buffer) \ + const hipsparseMatDescr_t descr, ValueType* vals, \ + const int32* row_ptrs, const int32* col_idxs, csric02Info_t info, \ + hipsparseSolvePolicy_t policy, void* buffer) \ { \ GKO_ASSERT_NO_HIPSPARSE_ERRORS( \ HipsparseName(handle, m, nnz, descr, as_hiplibs_type(vals), \ diff --git a/hip/base/kernel_launch.hip.hpp b/hip/base/kernel_launch.hip.hpp index 5e18cb9c645..8967ee5597d 100644 --- a/hip/base/kernel_launch.hip.hpp +++ b/hip/base/kernel_launch.hip.hpp @@ -80,7 +80,7 @@ __global__ __launch_bounds__(default_block_size) void generic_kernel_2d( template void run_kernel(std::shared_ptr exec, KernelFunction fn, - size_type size, KernelArgs &&... args) + size_type size, KernelArgs&&... args) { gko::hip::device_guard guard{exec->get_device_id()}; constexpr auto block_size = default_block_size; @@ -91,7 +91,7 @@ void run_kernel(std::shared_ptr exec, KernelFunction fn, template void run_kernel(std::shared_ptr exec, KernelFunction fn, - dim<2> size, KernelArgs &&... args) + dim<2> size, KernelArgs&&... args) { gko::hip::device_guard guard{exec->get_device_id()}; constexpr auto block_size = default_block_size; diff --git a/hip/base/kernel_launch_solver.hip.hpp b/hip/base/kernel_launch_solver.hip.hpp index ed8c610d5b2..a8335851a0e 100644 --- a/hip/base/kernel_launch_solver.hip.hpp +++ b/hip/base/kernel_launch_solver.hip.hpp @@ -63,7 +63,7 @@ __global__ __launch_bounds__(default_block_size) void generic_kernel_2d_solver( template void run_kernel_solver(std::shared_ptr exec, KernelFunction fn, dim<2> size, size_type default_stride, - KernelArgs &&... args) + KernelArgs&&... args) { gko::hip::device_guard guard{exec->get_device_id()}; constexpr auto block_size = kernels::hip::default_block_size; diff --git a/hip/base/pointer_mode_guard.hip.hpp b/hip/base/pointer_mode_guard.hip.hpp index f231909e6a5..a793d3be6ed 100644 --- a/hip/base/pointer_mode_guard.hip.hpp +++ b/hip/base/pointer_mode_guard.hip.hpp @@ -62,7 +62,7 @@ namespace hipblas { */ class pointer_mode_guard { public: - pointer_mode_guard(hipblasContext *handle) + pointer_mode_guard(hipblasContext* handle) { l_handle = handle; GKO_ASSERT_NO_HIPBLAS_ERRORS( @@ -70,13 +70,13 @@ class pointer_mode_guard { HIPBLAS_POINTER_MODE_HOST)); } - pointer_mode_guard(pointer_mode_guard &other) = delete; + pointer_mode_guard(pointer_mode_guard& other) = delete; - pointer_mode_guard &operator=(const pointer_mode_guard &other) = delete; + pointer_mode_guard& operator=(const pointer_mode_guard& other) = delete; - pointer_mode_guard(pointer_mode_guard &&other) = delete; + pointer_mode_guard(pointer_mode_guard&& other) = delete; - pointer_mode_guard const &operator=(pointer_mode_guard &&other) = delete; + pointer_mode_guard const& operator=(pointer_mode_guard&& other) = delete; ~pointer_mode_guard() noexcept(false) { @@ -92,7 +92,7 @@ class pointer_mode_guard { } private: - hipblasContext *l_handle; + hipblasContext* l_handle; }; @@ -112,7 +112,7 @@ namespace hipsparse { */ class pointer_mode_guard { public: - pointer_mode_guard(hipsparseContext *handle) + pointer_mode_guard(hipsparseContext* handle) { l_handle = handle; GKO_ASSERT_NO_HIPSPARSE_ERRORS( @@ -120,13 +120,13 @@ class pointer_mode_guard { HIPSPARSE_POINTER_MODE_HOST)); } - pointer_mode_guard(pointer_mode_guard &other) = delete; + pointer_mode_guard(pointer_mode_guard& other) = delete; - pointer_mode_guard &operator=(const pointer_mode_guard &other) = delete; + pointer_mode_guard& operator=(const pointer_mode_guard& other) = delete; - pointer_mode_guard(pointer_mode_guard &&other) = delete; + pointer_mode_guard(pointer_mode_guard&& other) = delete; - pointer_mode_guard const &operator=(pointer_mode_guard &&other) = delete; + pointer_mode_guard const& operator=(pointer_mode_guard&& other) = delete; ~pointer_mode_guard() noexcept(false) { @@ -143,7 +143,7 @@ class pointer_mode_guard { } private: - hipsparseContext *l_handle; + hipsparseContext* l_handle; }; diff --git a/hip/base/types.hip.hpp b/hip/base/types.hip.hpp index c0c7e87e333..5eb98c1d6d5 100644 --- a/hip/base/types.hip.hpp +++ b/hip/base/types.hip.hpp @@ -61,13 +61,13 @@ struct hiplibs_type_impl { }; template -struct hiplibs_type_impl { - using type = typename hiplibs_type_impl::type *; +struct hiplibs_type_impl { + using type = typename hiplibs_type_impl::type*; }; template -struct hiplibs_type_impl { - using type = typename hiplibs_type_impl::type &; +struct hiplibs_type_impl { + using type = typename hiplibs_type_impl::type&; }; template @@ -102,13 +102,13 @@ struct hipblas_type_impl { }; template -struct hipblas_type_impl { - using type = typename hipblas_type_impl::type *; +struct hipblas_type_impl { + using type = typename hipblas_type_impl::type*; }; template -struct hipblas_type_impl { - using type = typename hipblas_type_impl::type &; +struct hipblas_type_impl { + using type = typename hipblas_type_impl::type&; }; template @@ -143,13 +143,13 @@ struct hip_type_impl { }; template -struct hip_type_impl { - using type = typename hip_type_impl::type *; +struct hip_type_impl { + using type = typename hip_type_impl::type*; }; template -struct hip_type_impl { - using type = typename hip_type_impl::type &; +struct hip_type_impl { + using type = typename hip_type_impl::type&; }; template @@ -265,7 +265,7 @@ inline std::enable_if_t< !std::is_pointer::value && !std::is_reference::value, hip_type> as_hip_type(T val) { - return *reinterpret_cast *>(&val); + return *reinterpret_cast*>(&val); } diff --git a/hip/components/absolute_array.hip.cpp b/hip/components/absolute_array.hip.cpp index f5e4e9637a0..ad0e61d165a 100644 --- a/hip/components/absolute_array.hip.cpp +++ b/hip/components/absolute_array.hip.cpp @@ -54,7 +54,7 @@ constexpr int default_block_size = 512; template void inplace_absolute_array(std::shared_ptr exec, - ValueType *data, size_type n) + ValueType* data, size_type n) { const dim3 block_size(default_block_size, 1, 1); const dim3 grid_size(ceildiv(n, block_size.x), 1, 1); @@ -67,8 +67,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_INPLACE_ABSOLUTE_ARRAY_KERNEL); template void outplace_absolute_array(std::shared_ptr exec, - const ValueType *in, size_type n, - remove_complex *out) + const ValueType* in, size_type n, + remove_complex* out) { const dim3 block_size(default_block_size, 1, 1); const dim3 grid_size(ceildiv(n, block_size.x), 1, 1); diff --git a/hip/components/atomic.hip.hpp b/hip/components/atomic.hip.hpp index a6508e87f8c..19b55bd9c20 100644 --- a/hip/components/atomic.hip.hpp +++ b/hip/components/atomic.hip.hpp @@ -55,12 +55,12 @@ namespace hip { * @note It is not 'real' complex atomic add operation */ __forceinline__ __device__ thrust::complex atomic_add( - thrust::complex *__restrict__ address, thrust::complex val) + thrust::complex* __restrict__ address, thrust::complex val) { - hipComplex *addr = reinterpret_cast(address); + hipComplex* addr = reinterpret_cast(address); // Separate to real part and imag part - auto real = atomic_add(static_cast(&(addr->x)), val.real()); - auto imag = atomic_add(static_cast(&(addr->y)), val.imag()); + auto real = atomic_add(static_cast(&(addr->x)), val.real()); + auto imag = atomic_add(static_cast(&(addr->y)), val.imag()); return {real, imag}; } @@ -71,12 +71,12 @@ __forceinline__ __device__ thrust::complex atomic_add( * @note It is not 'real' complex atomic add operation */ __forceinline__ __device__ thrust::complex atomic_add( - thrust::complex *__restrict__ address, thrust::complex val) + thrust::complex* __restrict__ address, thrust::complex val) { - hipDoubleComplex *addr = reinterpret_cast(address); + hipDoubleComplex* addr = reinterpret_cast(address); // Separate to real part and imag part - auto real = atomic_add(static_cast(&(addr->x)), val.real()); - auto imag = atomic_add(static_cast(&(addr->y)), val.imag()); + auto real = atomic_add(static_cast(&(addr->x)), val.real()); + auto imag = atomic_add(static_cast(&(addr->y)), val.imag()); return {real, imag}; } diff --git a/hip/components/cooperative_groups.hip.hpp b/hip/components/cooperative_groups.hip.hpp index 10c1217da11..6f8e25d2339 100644 --- a/hip/components/cooperative_groups.hip.hpp +++ b/hip/components/cooperative_groups.hip.hpp @@ -331,12 +331,12 @@ class enable_extended_shuffle : public Group { #define GKO_ENABLE_SHUFFLE_OPERATION(_name, SelectorType) \ template \ - __device__ __forceinline__ ValueType _name(const ValueType &var, \ + __device__ __forceinline__ ValueType _name(const ValueType& var, \ SelectorType selector) const \ { \ return shuffle_impl( \ [this](uint32 v, SelectorType s) { \ - return static_cast(this)->_name(v, s); \ + return static_cast(this)->_name(v, s); \ }, \ var, selector); \ } @@ -359,8 +359,8 @@ class enable_extended_shuffle : public Group { "Unable to shuffle sizes which are not 4-byte multiples"); constexpr auto value_size = sizeof(ValueType) / sizeof(uint32); ValueType result; - auto var_array = reinterpret_cast(&var); - auto result_array = reinterpret_cast(&result); + auto var_array = reinterpret_cast(&var); + auto result_array = reinterpret_cast(&result); #pragma unroll for (std::size_t i = 0; i < value_size; ++i) { result_array[i] = intrinsic_shuffle(var_array[i], selector); @@ -389,7 +389,7 @@ __device__ __forceinline__ std::enable_if_t<(Size <= kernels::hip::config::warp_size) && (Size > 0) && (kernels::hip::config::warp_size % Size == 0), thread_block_tile> - tiled_partition(const Group &) + tiled_partition(const Group&) { return thread_block_tile(); } diff --git a/hip/components/fill_array.hip.cpp b/hip/components/fill_array.hip.cpp index 590a0953b1d..7f026a3271d 100644 --- a/hip/components/fill_array.hip.cpp +++ b/hip/components/fill_array.hip.cpp @@ -55,7 +55,7 @@ constexpr int default_block_size = 512; template -void fill_array(std::shared_ptr exec, ValueType *array, +void fill_array(std::shared_ptr exec, ValueType* array, size_type n, ValueType val) { const dim3 block_size(default_block_size, 1, 1); @@ -69,7 +69,7 @@ GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_FILL_ARRAY_KERNEL); template void fill_seq_array(std::shared_ptr exec, - ValueType *array, size_type n) + ValueType* array, size_type n) { const dim3 block_size(default_block_size, 1, 1); const dim3 grid_size(ceildiv(n, block_size.x), 1, 1); diff --git a/hip/components/format_conversion.hip.hpp b/hip/components/format_conversion.hip.hpp index c0c77869e3f..70fa01afcb2 100644 --- a/hip/components/format_conversion.hip.hpp +++ b/hip/components/format_conversion.hip.hpp @@ -65,8 +65,8 @@ namespace kernel { template __global__ void count_nnz_per_row(size_type num_rows, size_type max_nnz_per_row, size_type stride, - const ValueType *__restrict__ values, - IndexType *__restrict__ result); + const ValueType* __restrict__ values, + IndexType* __restrict__ result); } // namespace kernel @@ -83,9 +83,9 @@ namespace kernel { * It converts the row index of Coo to the row pointer of Csr. */ template -__global__ void convert_row_idxs_to_ptrs(const IndexType *__restrict__ idxs, +__global__ void convert_row_idxs_to_ptrs(const IndexType* __restrict__ idxs, size_type num_nonzeros, - IndexType *__restrict__ ptrs, + IndexType* __restrict__ ptrs, size_type length); diff --git a/hip/components/prefix_sum.hip.cpp b/hip/components/prefix_sum.hip.cpp index 9302fc07b9a..739a5329389 100644 --- a/hip/components/prefix_sum.hip.cpp +++ b/hip/components/prefix_sum.hip.cpp @@ -46,7 +46,7 @@ constexpr int prefix_sum_block_size = 512; template -void prefix_sum(std::shared_ptr exec, IndexType *counts, +void prefix_sum(std::shared_ptr exec, IndexType* counts, size_type num_entries) { // prefix_sum should only be performed on a valid array diff --git a/hip/components/reduction.hip.hpp b/hip/components/reduction.hip.hpp index f1c9c8d965f..87d6b518123 100644 --- a/hip/components/reduction.hip.hpp +++ b/hip/components/reduction.hip.hpp @@ -72,7 +72,7 @@ constexpr int default_block_size = 512; */ template __host__ ValueType reduce_add_array(std::shared_ptr exec, - size_type size, const ValueType *source) + size_type size, const ValueType* source) { auto block_results_val = source; size_type grid_dim = size; diff --git a/hip/factorization/factorization_kernels.hip.cpp b/hip/factorization/factorization_kernels.hip.cpp index ec4ab4806ee..fa563f91018 100644 --- a/hip/factorization/factorization_kernels.hip.cpp +++ b/hip/factorization/factorization_kernels.hip.cpp @@ -67,7 +67,7 @@ constexpr int default_block_size{512}; template void add_diagonal_elements(std::shared_ptr exec, - matrix::Csr *mtx, + matrix::Csr* mtx, bool is_sorted) { // TODO: Runtime can be optimized by choosing a appropriate size for the @@ -150,8 +150,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_row_ptrs_l_u( std::shared_ptr exec, - const matrix::Csr *system_matrix, - IndexType *l_row_ptrs, IndexType *u_row_ptrs) + const matrix::Csr* system_matrix, + IndexType* l_row_ptrs, IndexType* u_row_ptrs) { const size_type num_rows{system_matrix->get_size()[0]}; @@ -177,9 +177,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_l_u(std::shared_ptr exec, - const matrix::Csr *system_matrix, - matrix::Csr *csr_l, - matrix::Csr *csr_u) + const matrix::Csr* system_matrix, + matrix::Csr* csr_l, + matrix::Csr* csr_u) { const size_type num_rows{system_matrix->get_size()[0]}; const dim3 block_size{default_block_size, 1, 1}; @@ -205,8 +205,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_row_ptrs_l( std::shared_ptr exec, - const matrix::Csr *system_matrix, - IndexType *l_row_ptrs) + const matrix::Csr* system_matrix, + IndexType* l_row_ptrs) { const size_type num_rows{system_matrix->get_size()[0]}; @@ -231,8 +231,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_l(std::shared_ptr exec, - const matrix::Csr *system_matrix, - matrix::Csr *csr_l, bool diag_sqrt) + const matrix::Csr* system_matrix, + matrix::Csr* csr_l, bool diag_sqrt) { const size_type num_rows{system_matrix->get_size()[0]}; const dim3 block_size{default_block_size, 1, 1}; diff --git a/hip/factorization/ic_kernels.hip.cpp b/hip/factorization/ic_kernels.hip.cpp index 4510a18781c..6a0381728ea 100644 --- a/hip/factorization/ic_kernels.hip.cpp +++ b/hip/factorization/ic_kernels.hip.cpp @@ -56,7 +56,7 @@ namespace ic_factorization { template void compute(std::shared_ptr exec, - matrix::Csr *m) + matrix::Csr* m) { const auto id = exec->get_device_id(); auto handle = exec->get_hipsparse_handle(); diff --git a/hip/factorization/ilu_kernels.hip.cpp b/hip/factorization/ilu_kernels.hip.cpp index e22a8140ea9..de0255fbaaa 100644 --- a/hip/factorization/ilu_kernels.hip.cpp +++ b/hip/factorization/ilu_kernels.hip.cpp @@ -56,7 +56,7 @@ namespace ilu_factorization { template void compute_lu(std::shared_ptr exec, - matrix::Csr *m) + matrix::Csr* m) { const auto id = exec->get_device_id(); auto handle = exec->get_hipsparse_handle(); diff --git a/hip/factorization/par_ic_kernels.hip.cpp b/hip/factorization/par_ic_kernels.hip.cpp index 17e556046e2..c0e9617453d 100644 --- a/hip/factorization/par_ic_kernels.hip.cpp +++ b/hip/factorization/par_ic_kernels.hip.cpp @@ -67,7 +67,7 @@ using compiled_kernels = template void init_factor(std::shared_ptr exec, - matrix::Csr *l) + matrix::Csr* l) { auto num_rows = l->get_size()[0]; auto num_blocks = ceildiv(num_rows, default_block_size); @@ -85,8 +85,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void compute_factor(std::shared_ptr exec, size_type iterations, - const matrix::Coo *a_lower, - matrix::Csr *l) + const matrix::Coo* a_lower, + matrix::Csr* l) { auto nnz = l->get_num_stored_elements(); auto num_blocks = ceildiv(nnz, default_block_size); diff --git a/hip/factorization/par_ict_kernels.hip.cpp b/hip/factorization/par_ict_kernels.hip.cpp index a67c29d10a7..4e8aea41c46 100644 --- a/hip/factorization/par_ict_kernels.hip.cpp +++ b/hip/factorization/par_ict_kernels.hip.cpp @@ -86,10 +86,10 @@ namespace { template void add_candidates(syn::value_list, std::shared_ptr exec, - const matrix::Csr *llh, - const matrix::Csr *a, - const matrix::Csr *l, - matrix::Csr *l_new) + const matrix::Csr* llh, + const matrix::Csr* a, + const matrix::Csr* l, + matrix::Csr* l_new) { auto num_rows = static_cast(llh->get_size()[0]); auto subwarps_per_block = default_block_size / subwarp_size; @@ -138,9 +138,9 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_add_candidates, add_candidates); template void compute_factor(syn::value_list, std::shared_ptr exec, - const matrix::Csr *a, - matrix::Csr *l, - const matrix::Coo *l_coo) + const matrix::Csr* a, + matrix::Csr* l, + const matrix::Coo* l_coo) { auto total_nnz = static_cast(l->get_num_stored_elements()); auto block_size = default_block_size / subwarp_size; @@ -163,10 +163,10 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_compute_factor, compute_factor); template void add_candidates(std::shared_ptr exec, - const matrix::Csr *llh, - const matrix::Csr *a, - const matrix::Csr *l, - matrix::Csr *l_new) + const matrix::Csr* llh, + const matrix::Csr* a, + const matrix::Csr* l, + matrix::Csr* l_new) { auto num_rows = a->get_size()[0]; auto total_nnz = @@ -187,9 +187,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void compute_factor(std::shared_ptr exec, - const matrix::Csr *a, - matrix::Csr *l, - const matrix::Coo *l_coo) + const matrix::Csr* a, + matrix::Csr* l, + const matrix::Coo* l_coo) { auto num_rows = a->get_size()[0]; auto total_nnz = 2 * l->get_num_stored_elements(); diff --git a/hip/factorization/par_ilu_kernels.hip.cpp b/hip/factorization/par_ilu_kernels.hip.cpp index b414874930f..fb3f0661004 100644 --- a/hip/factorization/par_ilu_kernels.hip.cpp +++ b/hip/factorization/par_ilu_kernels.hip.cpp @@ -65,9 +65,9 @@ constexpr int default_block_size{512}; template void compute_l_u_factors(std::shared_ptr exec, size_type iterations, - const matrix::Coo *system_matrix, - matrix::Csr *l_factor, - matrix::Csr *u_factor) + const matrix::Coo* system_matrix, + matrix::Csr* l_factor, + matrix::Csr* u_factor) { iterations = (iterations == 0) ? 10 : iterations; const auto num_elements = system_matrix->get_num_stored_elements(); diff --git a/hip/factorization/par_ilut_approx_filter_kernel.hip.cpp b/hip/factorization/par_ilut_approx_filter_kernel.hip.cpp index 319fdf4210c..95bae477416 100644 --- a/hip/factorization/par_ilut_approx_filter_kernel.hip.cpp +++ b/hip/factorization/par_ilut_approx_filter_kernel.hip.cpp @@ -86,11 +86,11 @@ using compiled_kernels = template void threshold_filter_approx(syn::value_list, std::shared_ptr exec, - const matrix::Csr *m, - IndexType rank, Array *tmp, - remove_complex *threshold, - matrix::Csr *m_out, - matrix::Coo *m_out_coo) + const matrix::Csr* m, + IndexType rank, Array* tmp, + remove_complex* threshold, + matrix::Csr* m_out, + matrix::Coo* m_out_coo) { auto values = m->get_const_values(); IndexType size = m->get_num_stored_elements(); @@ -111,14 +111,14 @@ void threshold_filter_approx(syn::value_list, tmp_size_totals + tmp_size_partials + tmp_size_oracles + tmp_size_tree; tmp->resize_and_reset(tmp_size); - auto total_counts = reinterpret_cast(tmp->get_data()); + auto total_counts = reinterpret_cast(tmp->get_data()); auto partial_counts = - reinterpret_cast(tmp->get_data() + tmp_size_totals); - auto oracles = reinterpret_cast( + reinterpret_cast(tmp->get_data() + tmp_size_totals); + auto oracles = reinterpret_cast( tmp->get_data() + tmp_size_totals + tmp_size_partials); auto tree = - reinterpret_cast(tmp->get_data() + tmp_size_totals + - tmp_size_partials + tmp_size_oracles); + reinterpret_cast(tmp->get_data() + tmp_size_totals + + tmp_size_partials + tmp_size_oracles); sampleselect_count(exec, values, size, tree, oracles, partial_counts, total_counts); @@ -157,7 +157,7 @@ void threshold_filter_approx(syn::value_list, builder.get_value_array().resize_and_reset(new_nnz); auto new_col_idxs = m_out->get_col_idxs(); auto new_vals = m_out->get_values(); - IndexType *new_row_idxs{}; + IndexType* new_row_idxs{}; if (m_out_coo) { matrix::CooBuilder coo_builder{m_out_coo}; coo_builder.get_row_idx_array().resize_and_reset(new_nnz); @@ -181,11 +181,11 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_threshold_filter_approx, template void threshold_filter_approx(std::shared_ptr exec, - const matrix::Csr *m, - IndexType rank, Array &tmp, - remove_complex &threshold, - matrix::Csr *m_out, - matrix::Coo *m_out_coo) + const matrix::Csr* m, + IndexType rank, Array& tmp, + remove_complex& threshold, + matrix::Csr* m_out, + matrix::Coo* m_out_coo) { auto num_rows = m->get_size()[0]; auto total_nnz = m->get_num_stored_elements(); diff --git a/hip/factorization/par_ilut_filter_kernel.hip.cpp b/hip/factorization/par_ilut_filter_kernel.hip.cpp index e8997ebeb40..c08e28934b3 100644 --- a/hip/factorization/par_ilut_filter_kernel.hip.cpp +++ b/hip/factorization/par_ilut_filter_kernel.hip.cpp @@ -84,10 +84,10 @@ namespace { template void threshold_filter(syn::value_list, std::shared_ptr exec, - const matrix::Csr *a, + const matrix::Csr* a, remove_complex threshold, - matrix::Csr *m_out, - matrix::Coo *m_out_coo, bool lower) + matrix::Csr* m_out, + matrix::Coo* m_out_coo, bool lower) { auto old_row_ptrs = a->get_const_row_ptrs(); auto old_col_idxs = a->get_const_col_idxs(); @@ -113,7 +113,7 @@ void threshold_filter(syn::value_list, builder.get_value_array().resize_and_reset(new_nnz); auto new_col_idxs = m_out->get_col_idxs(); auto new_vals = m_out->get_values(); - IndexType *new_row_idxs{}; + IndexType* new_row_idxs{}; if (m_out_coo) { matrix::CooBuilder coo_builder{m_out_coo}; coo_builder.get_row_idx_array().resize_and_reset(new_nnz); @@ -138,10 +138,10 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_threshold_filter, threshold_filter); template void threshold_filter(std::shared_ptr exec, - const matrix::Csr *a, + const matrix::Csr* a, remove_complex threshold, - matrix::Csr *m_out, - matrix::Coo *m_out_coo, bool lower) + matrix::Csr* m_out, + matrix::Coo* m_out_coo, bool lower) { auto num_rows = a->get_size()[0]; auto total_nnz = a->get_num_stored_elements(); diff --git a/hip/factorization/par_ilut_select_common.hip.cpp b/hip/factorization/par_ilut_select_common.hip.cpp index 4e453270d78..36f338d9b55 100644 --- a/hip/factorization/par_ilut_select_common.hip.cpp +++ b/hip/factorization/par_ilut_select_common.hip.cpp @@ -66,9 +66,9 @@ namespace par_ilut_factorization { template void sampleselect_count(std::shared_ptr exec, - const ValueType *values, IndexType size, - remove_complex *tree, unsigned char *oracles, - IndexType *partial_counts, IndexType *total_counts) + const ValueType* values, IndexType size, + remove_complex* tree, unsigned char* oracles, + IndexType* partial_counts, IndexType* total_counts) { constexpr auto bucket_count = kernel::searchtree_width; auto num_threads_total = ceildiv(size, items_per_thread); @@ -93,17 +93,17 @@ void sampleselect_count(std::shared_ptr exec, #define DECLARE_SSSS_COUNT(ValueType, IndexType) \ void sampleselect_count(std::shared_ptr exec, \ - const ValueType *values, IndexType size, \ - remove_complex *tree, \ - unsigned char *oracles, IndexType *partial_counts, \ - IndexType *total_counts) + const ValueType* values, IndexType size, \ + remove_complex* tree, \ + unsigned char* oracles, IndexType* partial_counts, \ + IndexType* total_counts) GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(DECLARE_SSSS_COUNT); template sampleselect_bucket sampleselect_find_bucket( - std::shared_ptr exec, IndexType *prefix_sum, + std::shared_ptr exec, IndexType* prefix_sum, IndexType rank) { hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel::find_bucket), dim3(1), @@ -116,7 +116,7 @@ sampleselect_bucket sampleselect_find_bucket( #define DECLARE_SSSS_FIND_BUCKET(IndexType) \ sampleselect_bucket sampleselect_find_bucket( \ - std::shared_ptr exec, IndexType *prefix_sum, \ + std::shared_ptr exec, IndexType* prefix_sum, \ IndexType rank) GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(DECLARE_SSSS_FIND_BUCKET); diff --git a/hip/factorization/par_ilut_select_common.hip.hpp b/hip/factorization/par_ilut_select_common.hip.hpp index 38f653abf6e..d86c620baa7 100644 --- a/hip/factorization/par_ilut_select_common.hip.hpp +++ b/hip/factorization/par_ilut_select_common.hip.hpp @@ -51,9 +51,9 @@ constexpr int items_per_thread = 16; template void sampleselect_count(std::shared_ptr exec, - const ValueType *values, IndexType size, - remove_complex *tree, unsigned char *oracles, - IndexType *partial_counts, IndexType *total_counts); + const ValueType* values, IndexType size, + remove_complex* tree, unsigned char* oracles, + IndexType* partial_counts, IndexType* total_counts); template @@ -66,7 +66,7 @@ struct sampleselect_bucket { template sampleselect_bucket sampleselect_find_bucket( - std::shared_ptr exec, IndexType *prefix_sum, + std::shared_ptr exec, IndexType* prefix_sum, IndexType rank); diff --git a/hip/factorization/par_ilut_select_kernel.hip.cpp b/hip/factorization/par_ilut_select_kernel.hip.cpp index 00adc9fcba1..235e94bb7e9 100644 --- a/hip/factorization/par_ilut_select_kernel.hip.cpp +++ b/hip/factorization/par_ilut_select_kernel.hip.cpp @@ -70,10 +70,10 @@ namespace par_ilut_factorization { template -void sampleselect_filter(const ValueType *values, IndexType size, - const unsigned char *oracles, - const IndexType *partial_counts, IndexType bucket, - remove_complex *out) +void sampleselect_filter(const ValueType* values, IndexType size, + const unsigned char* oracles, + const IndexType* partial_counts, IndexType bucket, + remove_complex* out) { auto num_threads_total = ceildiv(size, items_per_thread); auto num_blocks = @@ -87,10 +87,10 @@ void sampleselect_filter(const ValueType *values, IndexType size, template void threshold_select(std::shared_ptr exec, - const matrix::Csr *m, - IndexType rank, Array &tmp1, - Array> &tmp2, - remove_complex &threshold) + const matrix::Csr* m, + IndexType rank, Array& tmp1, + Array>& tmp2, + remove_complex& threshold) { auto values = m->get_const_values(); IndexType size = m->get_num_stored_elements(); @@ -114,14 +114,14 @@ void threshold_select(std::shared_ptr exec, tmp1.resize_and_reset(tmp_size); tmp2.resize_and_reset(tmp_size_vals); - auto total_counts = reinterpret_cast(tmp1.get_data()); + auto total_counts = reinterpret_cast(tmp1.get_data()); auto partial_counts = - reinterpret_cast(tmp1.get_data() + tmp_size_totals); - auto oracles = reinterpret_cast( + reinterpret_cast(tmp1.get_data() + tmp_size_totals); + auto oracles = reinterpret_cast( tmp1.get_data() + tmp_size_totals + tmp_size_partials); auto tree = - reinterpret_cast(tmp1.get_data() + tmp_size_totals + - tmp_size_partials + tmp_size_oracles); + reinterpret_cast(tmp1.get_data() + tmp_size_totals + + tmp_size_partials + tmp_size_oracles); sampleselect_count(exec, values, size, tree, oracles, partial_counts, total_counts); @@ -144,7 +144,7 @@ void threshold_select(std::shared_ptr exec, int step{}; while (bucket.size > kernel::basecase_size) { std::swap(tmp21, tmp22); - const auto *tmp_in = tmp21; + const auto* tmp_in = tmp21; auto tmp_out = tmp22; sampleselect_count(exec, tmp_in, bucket.size, tree, oracles, @@ -172,7 +172,7 @@ void threshold_select(std::shared_ptr exec, } // base case - auto out_ptr = reinterpret_cast(tmp1.get_data()); + auto out_ptr = reinterpret_cast(tmp1.get_data()); hipLaunchKernelGGL(HIP_KERNEL_NAME(kernel::basecase_select), dim3(1), dim3(kernel::basecase_block_size), 0, 0, tmp22, bucket.size, rank, out_ptr); diff --git a/hip/factorization/par_ilut_spgeam_kernel.hip.cpp b/hip/factorization/par_ilut_spgeam_kernel.hip.cpp index d7815de1d2a..a6dc5e73328 100644 --- a/hip/factorization/par_ilut_spgeam_kernel.hip.cpp +++ b/hip/factorization/par_ilut_spgeam_kernel.hip.cpp @@ -85,12 +85,12 @@ namespace { template void add_candidates(syn::value_list, std::shared_ptr exec, - const matrix::Csr *lu, - const matrix::Csr *a, - const matrix::Csr *l, - const matrix::Csr *u, - matrix::Csr *l_new, - matrix::Csr *u_new) + const matrix::Csr* lu, + const matrix::Csr* a, + const matrix::Csr* l, + const matrix::Csr* u, + matrix::Csr* l_new, + matrix::Csr* u_new) { auto num_rows = static_cast(lu->get_size()[0]); auto subwarps_per_block = default_block_size / subwarp_size; @@ -154,12 +154,12 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_add_candidates, add_candidates); template void add_candidates(std::shared_ptr exec, - const matrix::Csr *lu, - const matrix::Csr *a, - const matrix::Csr *l, - const matrix::Csr *u, - matrix::Csr *l_new, - matrix::Csr *u_new) + const matrix::Csr* lu, + const matrix::Csr* a, + const matrix::Csr* l, + const matrix::Csr* u, + matrix::Csr* l_new, + matrix::Csr* u_new) { auto num_rows = a->get_size()[0]; auto total_nnz = diff --git a/hip/factorization/par_ilut_sweep_kernel.hip.cpp b/hip/factorization/par_ilut_sweep_kernel.hip.cpp index b00a225a6b4..780fd655edb 100644 --- a/hip/factorization/par_ilut_sweep_kernel.hip.cpp +++ b/hip/factorization/par_ilut_sweep_kernel.hip.cpp @@ -85,12 +85,12 @@ namespace { template void compute_l_u_factors(syn::value_list, std::shared_ptr exec, - const matrix::Csr *a, - matrix::Csr *l, - const matrix::Coo *l_coo, - matrix::Csr *u, - const matrix::Coo *u_coo, - matrix::Csr *u_csc) + const matrix::Csr* a, + matrix::Csr* l, + const matrix::Coo* l_coo, + matrix::Csr* u, + const matrix::Coo* u_coo, + matrix::Csr* u_csc) { auto total_nnz = static_cast(l->get_num_stored_elements() + u->get_num_stored_elements()); @@ -119,12 +119,12 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_compute_l_u_factors, template void compute_l_u_factors(std::shared_ptr exec, - const matrix::Csr *a, - matrix::Csr *l, - const matrix::Coo *l_coo, - matrix::Csr *u, - const matrix::Coo *u_coo, - matrix::Csr *u_csc) + const matrix::Csr* a, + matrix::Csr* l, + const matrix::Coo* l_coo, + matrix::Csr* u, + const matrix::Coo* u_coo, + matrix::Csr* u_csc) { auto num_rows = a->get_size()[0]; auto total_nnz = diff --git a/hip/matrix/coo_kernels.hip.cpp b/hip/matrix/coo_kernels.hip.cpp index b1318230c7a..82997bcb80b 100644 --- a/hip/matrix/coo_kernels.hip.cpp +++ b/hip/matrix/coo_kernels.hip.cpp @@ -81,8 +81,8 @@ constexpr int spmv_block_size = warps_in_block * config::warp_size; template void spmv(std::shared_ptr exec, - const matrix::Coo *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Coo* a, + const matrix::Dense* b, matrix::Dense* c) { dense::fill(exec, c, zero()); spmv2(exec, a, b, c); @@ -93,11 +93,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Coo *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Coo* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { dense::scale(exec, beta, c); advanced_spmv2(exec, alpha, a, b, c); @@ -109,8 +109,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void spmv2(std::shared_ptr exec, - const matrix::Coo *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Coo* a, + const matrix::Dense* b, matrix::Dense* c) { const auto nnz = a->get_num_stored_elements(); const auto b_ncols = b->get_size()[1]; @@ -148,10 +148,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV2_KERNEL); template void advanced_spmv2(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Coo *a, - const matrix::Dense *b, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Coo* a, + const matrix::Dense* b, + matrix::Dense* c) { const auto nnz = a->get_num_stored_elements(); const auto nwarps = host_kernel::calculate_nwarps(exec, nnz); @@ -192,8 +192,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_row_idxs_to_ptrs(std::shared_ptr exec, - const IndexType *idxs, size_type num_nonzeros, - IndexType *ptrs, size_type length) + const IndexType* idxs, size_type num_nonzeros, + IndexType* ptrs, size_type length) { const auto grid_dim = ceildiv(num_nonzeros, default_block_size); @@ -205,8 +205,8 @@ void convert_row_idxs_to_ptrs(std::shared_ptr exec, template void convert_to_csr(std::shared_ptr exec, - const matrix::Coo *source, - matrix::Csr *result) + const matrix::Coo* source, + matrix::Csr* result) { auto num_rows = result->get_size()[0]; @@ -225,8 +225,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Coo *source, - matrix::Dense *result) + const matrix::Coo* source, + matrix::Dense* result) { const auto num_rows = result->get_size()[0]; const auto num_cols = result->get_size()[1]; diff --git a/hip/matrix/csr_kernels.hip.cpp b/hip/matrix/csr_kernels.hip.cpp index b2a712e1924..be28c3a471b 100644 --- a/hip/matrix/csr_kernels.hip.cpp +++ b/hip/matrix/csr_kernels.hip.cpp @@ -108,11 +108,11 @@ namespace host_kernel { template void merge_path_spmv(syn::value_list, std::shared_ptr exec, - const matrix::Csr *a, - const matrix::Dense *b, - matrix::Dense *c, - const matrix::Dense *alpha = nullptr, - const matrix::Dense *beta = nullptr) + const matrix::Csr* a, + const matrix::Dense* b, + matrix::Dense* c, + const matrix::Dense* alpha = nullptr, + const matrix::Dense* beta = nullptr) { const IndexType total = a->get_size()[0] + a->get_num_stored_elements(); const IndexType grid_num = @@ -232,11 +232,11 @@ int compute_items_per_thread(std::shared_ptr exec) template void classical_spmv(syn::value_list, std::shared_ptr exec, - const matrix::Csr *a, - const matrix::Dense *b, - matrix::Dense *c, - const matrix::Dense *alpha = nullptr, - const matrix::Dense *beta = nullptr) + const matrix::Csr* a, + const matrix::Dense* b, + matrix::Dense* c, + const matrix::Dense* alpha = nullptr, + const matrix::Dense* beta = nullptr) { const auto nwarps = exec->get_num_warps_per_sm() * exec->get_num_multiprocessor() * classical_overweight; @@ -278,8 +278,8 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_classical_spmv, classical_spmv); template void spmv(std::shared_ptr exec, - const matrix::Csr *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Csr* a, + const matrix::Dense* b, matrix::Dense* c) { if (a->get_strategy()->get_name() == "load_balance") { components::fill_array(exec, c->get_values(), @@ -364,11 +364,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Csr *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Csr* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { if (a->get_strategy()->get_name() == "load_balance") { dense::scale(exec, beta, c); @@ -457,9 +457,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void spgemm(std::shared_ptr exec, - const matrix::Csr *a, - const matrix::Csr *b, - matrix::Csr *c) + const matrix::Csr* a, + const matrix::Csr* b, + matrix::Csr* c) { if (hipsparse::is_supported::value) { auto handle = exec->get_hipsparse_handle(); @@ -479,16 +479,16 @@ void spgemm(std::shared_ptr exec, auto b_vals = b->get_const_values(); auto b_row_ptrs = b->get_const_row_ptrs(); auto b_col_idxs = b->get_const_col_idxs(); - auto null_value = static_cast(nullptr); - auto null_index = static_cast(nullptr); + auto null_value = static_cast(nullptr); + auto null_index = static_cast(nullptr); auto zero_nnz = IndexType{}; auto m = static_cast(a->get_size()[0]); auto n = static_cast(b->get_size()[1]); auto k = static_cast(a->get_size()[1]); auto c_row_ptrs = c->get_row_ptrs(); matrix::CsrBuilder c_builder{c}; - auto &c_col_idxs_array = c_builder.get_col_idx_array(); - auto &c_vals_array = c_builder.get_value_array(); + auto& c_col_idxs_array = c_builder.get_col_idx_array(); + auto& c_vals_array = c_builder.get_value_array(); // allocate buffer size_type buffer_size{}; @@ -535,11 +535,11 @@ namespace { template void spgeam(syn::value_list, - std::shared_ptr exec, const ValueType *alpha, - const IndexType *a_row_ptrs, const IndexType *a_col_idxs, - const ValueType *a_vals, const ValueType *beta, - const IndexType *b_row_ptrs, const IndexType *b_col_idxs, - const ValueType *b_vals, matrix::Csr *c) + std::shared_ptr exec, const ValueType* alpha, + const IndexType* a_row_ptrs, const IndexType* a_col_idxs, + const ValueType* a_vals, const ValueType* beta, + const IndexType* b_row_ptrs, const IndexType* b_col_idxs, + const ValueType* b_vals, matrix::Csr* c) { auto m = static_cast(c->get_size()[0]); auto c_row_ptrs = c->get_row_ptrs(); @@ -577,12 +577,12 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_spgeam, spgeam); template void advanced_spgemm(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Csr *a, - const matrix::Csr *b, - const matrix::Dense *beta, - const matrix::Csr *d, - matrix::Csr *c) + const matrix::Dense* alpha, + const matrix::Csr* a, + const matrix::Csr* b, + const matrix::Dense* beta, + const matrix::Csr* d, + matrix::Csr* c) { if (hipsparse::is_supported::value) { auto handle = exec->get_hipsparse_handle(); @@ -604,8 +604,8 @@ void advanced_spgemm(std::shared_ptr exec, auto d_vals = d->get_const_values(); auto d_row_ptrs = d->get_const_row_ptrs(); auto d_col_idxs = d->get_const_col_idxs(); - auto null_value = static_cast(nullptr); - auto null_index = static_cast(nullptr); + auto null_value = static_cast(nullptr); + auto null_index = static_cast(nullptr); auto one_value = one(); auto m = static_cast(a->get_size()[0]); auto n = static_cast(b->get_size()[1]); @@ -671,11 +671,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void spgeam(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Csr *a, - const matrix::Dense *beta, - const matrix::Csr *b, - matrix::Csr *c) + const matrix::Dense* alpha, + const matrix::Csr* a, + const matrix::Dense* beta, + const matrix::Csr* b, + matrix::Csr* c) { auto total_nnz = a->get_num_stored_elements() + b->get_num_stored_elements(); @@ -698,8 +698,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL); template void convert_row_ptrs_to_idxs(std::shared_ptr exec, - const IndexType *ptrs, size_type num_rows, - IndexType *idxs) + const IndexType* ptrs, size_type num_rows, + IndexType* idxs) { const auto grid_dim = ceildiv(num_rows, default_block_size); @@ -711,8 +711,8 @@ void convert_row_ptrs_to_idxs(std::shared_ptr exec, template void convert_to_coo(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Coo *result) + const matrix::Csr* source, + matrix::Coo* result) { auto num_rows = result->get_size()[0]; @@ -728,8 +728,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Dense *result) + const matrix::Csr* source, + matrix::Dense* result) { const auto num_rows = result->get_size()[0]; const auto num_cols = result->get_size()[1]; @@ -759,8 +759,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sellp(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Sellp *result) + const matrix::Csr* source, + matrix::Sellp* result) { const auto num_rows = result->get_size()[0]; const auto num_cols = result->get_size()[1]; @@ -821,8 +821,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_ell(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Ell *result) + const matrix::Csr* source, + matrix::Ell* result) { const auto source_values = source->get_const_values(); const auto source_row_ptrs = source->get_const_row_ptrs(); @@ -859,8 +859,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_total_cols(std::shared_ptr exec, - const matrix::Csr *source, - size_type *result, size_type stride_factor, + const matrix::Csr* source, + size_type* result, size_type stride_factor, size_type slice_size) { const auto num_rows = source->get_size()[0]; @@ -913,8 +913,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void transpose(std::shared_ptr exec, - const matrix::Csr *orig, - matrix::Csr *trans) + const matrix::Csr* orig, + matrix::Csr* trans) { if (hipsparse::is_supported::value) { hipsparseAction_t copyValues = HIPSPARSE_ACTION_NUMERIC; @@ -936,8 +936,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_TRANSPOSE_KERNEL); template void conj_transpose(std::shared_ptr exec, - const matrix::Csr *orig, - matrix::Csr *trans) + const matrix::Csr* orig, + matrix::Csr* trans) { if (hipsparse::is_supported::value) { const dim3 block_size(default_block_size, 1, 1); @@ -968,9 +968,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inv_symm_permute(std::shared_ptr exec, - const IndexType *perm, - const matrix::Csr *orig, - matrix::Csr *permuted) + const IndexType* perm, + const matrix::Csr* orig, + matrix::Csr* permuted) { auto num_rows = orig->get_size()[0]; auto count_num_blocks = ceildiv(num_rows, default_block_size); @@ -994,9 +994,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void row_permute(std::shared_ptr exec, const IndexType *perm, - const matrix::Csr *orig, - matrix::Csr *row_permuted) +void row_permute(std::shared_ptr exec, const IndexType* perm, + const matrix::Csr* orig, + matrix::Csr* row_permuted) { auto num_rows = orig->get_size()[0]; auto count_num_blocks = ceildiv(num_rows, default_block_size); @@ -1021,9 +1021,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inverse_row_permute(std::shared_ptr exec, - const IndexType *perm, - const matrix::Csr *orig, - matrix::Csr *row_permuted) + const IndexType* perm, + const matrix::Csr* orig, + matrix::Csr* row_permuted) { auto num_rows = orig->get_size()[0]; auto count_num_blocks = ceildiv(num_rows, default_block_size); @@ -1048,8 +1048,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_max_nnz_per_row(std::shared_ptr exec, - const matrix::Csr *source, - size_type *result) + const matrix::Csr* source, + size_type* result) { const auto num_rows = source->get_size()[0]; @@ -1084,8 +1084,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_hybrid(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Hybrid *result) + const matrix::Csr* source, + matrix::Hybrid* result) { auto ell_val = result->get_ell_values(); auto ell_col = result->get_ell_col_idxs(); @@ -1130,8 +1130,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_nonzeros_per_row(std::shared_ptr exec, - const matrix::Csr *source, - Array *result) + const matrix::Csr* source, + Array* result) { const auto num_rows = source->get_size()[0]; auto row_ptrs = source->get_const_row_ptrs(); @@ -1148,7 +1148,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void sort_by_column_index(std::shared_ptr exec, - matrix::Csr *to_sort) + matrix::Csr* to_sort) { if (hipsparse::is_supported::value) { auto handle = exec->get_hipsparse_handle(); @@ -1197,7 +1197,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::Csr *to_check, bool *is_sorted) + const matrix::Csr* to_check, bool* is_sorted) { *is_sorted = true; auto cpu_array = Array::view(exec->get_master(), 1, is_sorted); @@ -1218,8 +1218,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Csr *orig, - matrix::Diagonal *diag) + const matrix::Csr* orig, + matrix::Diagonal* diag) { const auto nnz = orig->get_num_stored_elements(); const auto diag_size = diag->get_size()[0]; diff --git a/hip/matrix/dense_kernels.hip.cpp b/hip/matrix/dense_kernels.hip.cpp index 02ff05b5b3f..56ed5c327b9 100644 --- a/hip/matrix/dense_kernels.hip.cpp +++ b/hip/matrix/dense_kernels.hip.cpp @@ -75,9 +75,9 @@ constexpr int default_block_size = 512; template void simple_apply(std::shared_ptr exec, - const matrix::Dense *a, - const matrix::Dense *b, - matrix::Dense *c) + const matrix::Dense* a, + const matrix::Dense* b, + matrix::Dense* c) { if (hipblas::is_supported::value) { auto handle = exec->get_hipblas_handle(); @@ -101,9 +101,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL); template void apply(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Dense *a, const matrix::Dense *b, - const matrix::Dense *beta, matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Dense* a, const matrix::Dense* b, + const matrix::Dense* beta, matrix::Dense* c) { if (hipblas::is_supported::value) { hipblas::gemm(exec->get_hipblas_handle(), HIPBLAS_OP_N, HIPBLAS_OP_N, @@ -122,9 +122,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); template void compute_dot(std::shared_ptr exec, - const matrix::Dense *x, - const matrix::Dense *y, - matrix::Dense *result) + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) { if (hipblas::is_supported::value) { // TODO: write a custom kernel which does this more efficiently @@ -169,9 +169,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL); template void compute_conj_dot(std::shared_ptr exec, - const matrix::Dense *x, - const matrix::Dense *y, - matrix::Dense *result) + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) { if (hipblas::is_supported::value) { // TODO: write a custom kernel which does this more efficiently @@ -216,8 +216,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL); template void compute_norm2(std::shared_ptr exec, - const matrix::Dense *x, - matrix::Dense> *result) + const matrix::Dense* x, + matrix::Dense>* result) { if (hipblas::is_supported::value) { for (size_type col = 0; col < x->get_size()[1]; ++col) { @@ -260,8 +260,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL); template void convert_to_coo(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Coo *result) + const matrix::Dense* source, + matrix::Coo* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -293,8 +293,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Csr *result) + const matrix::Dense* source, + matrix::Csr* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -329,8 +329,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_ell(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Ell *result) + const matrix::Dense* source, + matrix::Ell* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -356,8 +356,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_hybrid(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Hybrid *result) + const matrix::Dense* source, + matrix::Hybrid* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -366,8 +366,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sellp(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Sellp *result) + const matrix::Dense* source, + matrix::Sellp* result) { const auto stride = source->get_stride(); const auto num_rows = result->get_size()[0]; @@ -417,8 +417,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sparsity_csr(std::shared_ptr exec, - const matrix::Dense *source, - matrix::SparsityCsr *result) + const matrix::Dense* source, + matrix::SparsityCsr* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -427,7 +427,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Dense *source, size_type *result) + const matrix::Dense* source, size_type* result) { const auto num_rows = source->get_size()[0]; auto nnz_per_row = Array(exec, num_rows); @@ -442,8 +442,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COUNT_NONZEROS_KERNEL); template void calculate_max_nnz_per_row(std::shared_ptr exec, - const matrix::Dense *source, - size_type *result) + const matrix::Dense* source, + size_type* result) { const auto num_rows = source->get_size()[0]; auto nnz_per_row = Array(exec, num_rows); @@ -479,8 +479,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void calculate_nonzeros_per_row(std::shared_ptr exec, - const matrix::Dense *source, - Array *result) + const matrix::Dense* source, + Array* result) { const dim3 block_size(default_block_size, 1, 1); auto rows_per_block = ceildiv(default_block_size, config::warp_size); @@ -501,8 +501,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void calculate_total_cols(std::shared_ptr exec, - const matrix::Dense *source, - size_type *result, size_type stride_factor, + const matrix::Dense* source, + size_type* result, size_type stride_factor, size_type slice_size) { const auto num_rows = source->get_size()[0]; @@ -554,8 +554,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void transpose(std::shared_ptr exec, - const matrix::Dense *orig, - matrix::Dense *trans) + const matrix::Dense* orig, + matrix::Dense* trans) { if (hipblas::is_supported::value) { auto handle = exec->get_hipblas_handle(); @@ -579,8 +579,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_TRANSPOSE_KERNEL); template void conj_transpose(std::shared_ptr exec, - const matrix::Dense *orig, - matrix::Dense *trans) + const matrix::Dense* orig, + matrix::Dense* trans) { if (hipblas::is_supported::value) { auto handle = exec->get_hipblas_handle(); diff --git a/hip/matrix/diagonal_kernels.hip.cpp b/hip/matrix/diagonal_kernels.hip.cpp index a75d4aa6669..570f4388d83 100644 --- a/hip/matrix/diagonal_kernels.hip.cpp +++ b/hip/matrix/diagonal_kernels.hip.cpp @@ -65,9 +65,9 @@ constexpr int default_block_size = 512; template void apply_to_csr(std::shared_ptr exec, - const matrix::Diagonal *a, - const matrix::Csr *b, - matrix::Csr *c) + const matrix::Diagonal* a, + const matrix::Csr* b, + matrix::Csr* c) { const auto num_rows = b->get_size()[0]; const auto diag_values = a->get_const_values(); diff --git a/hip/matrix/ell_kernels.hip.cpp b/hip/matrix/ell_kernels.hip.cpp index 27ced862d93..a65fc54dbea 100644 --- a/hip/matrix/ell_kernels.hip.cpp +++ b/hip/matrix/ell_kernels.hip.cpp @@ -115,7 +115,7 @@ namespace { template GKO_INLINE auto as_hip_accessor( - const acc::range> &acc) + const acc::range>& acc) { return acc::range< acc::reduced_row_major, hip_type>>( @@ -128,11 +128,11 @@ GKO_INLINE auto as_hip_accessor( template void abstract_spmv(syn::value_list, int num_worker_per_row, - const matrix::Ell *a, - const matrix::Dense *b, - matrix::Dense *c, - const matrix::Dense *alpha = nullptr, - const matrix::Dense *beta = nullptr) + const matrix::Ell* a, + const matrix::Dense* b, + matrix::Dense* c, + const matrix::Dense* alpha = nullptr, + const matrix::Dense* beta = nullptr) { using a_accessor = gko::acc::reduced_row_major<1, OutputValueType, const MatrixValueType>; @@ -187,7 +187,7 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_abstract_spmv, abstract_spmv); template std::array compute_thread_worker_and_atomicity( std::shared_ptr exec, - const matrix::Ell *a) + const matrix::Ell* a) { int num_thread_per_worker = 1; int atomic = 0; @@ -231,9 +231,9 @@ std::array compute_thread_worker_and_atomicity( template void spmv(std::shared_ptr exec, - const matrix::Ell *a, - const matrix::Dense *b, - matrix::Dense *c) + const matrix::Ell* a, + const matrix::Dense* b, + matrix::Dense* c) { const auto data = compute_thread_worker_and_atomicity(exec, a); const int num_thread_per_worker = std::get<0>(data); @@ -265,11 +265,11 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Ell *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Ell* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { const auto data = compute_thread_worker_and_atomicity(exec, a); const int num_thread_per_worker = std::get<0>(data); @@ -298,8 +298,8 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Ell *source, - matrix::Dense *result) + const matrix::Ell* source, + matrix::Dense* result) { const auto num_rows = result->get_size()[0]; const auto num_cols = result->get_size()[1]; @@ -330,8 +330,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Ell *source, - matrix::Csr *result) + const matrix::Ell* source, + matrix::Csr* result) { auto num_rows = result->get_size()[0]; @@ -370,8 +370,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Ell *source, - size_type *result) + const matrix::Ell* source, + size_type* result) { const auto num_rows = source->get_size()[0]; auto nnz_per_row = Array(exec, num_rows); @@ -387,8 +387,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_nonzeros_per_row(std::shared_ptr exec, - const matrix::Ell *source, - Array *result) + const matrix::Ell* source, + Array* result) { const auto num_rows = source->get_size()[0]; const auto max_nnz_per_row = source->get_num_stored_elements_per_row(); @@ -410,8 +410,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Ell *orig, - matrix::Diagonal *diag) + const matrix::Ell* orig, + matrix::Diagonal* diag) { const auto max_nnz_per_row = orig->get_num_stored_elements_per_row(); const auto orig_stride = orig->get_stride(); diff --git a/hip/matrix/fbcsr_kernels.hip.cpp b/hip/matrix/fbcsr_kernels.hip.cpp index 13ca94ad5f7..484403be6c7 100644 --- a/hip/matrix/fbcsr_kernels.hip.cpp +++ b/hip/matrix/fbcsr_kernels.hip.cpp @@ -62,20 +62,20 @@ namespace fbcsr { template void spmv(std::shared_ptr exec, - const matrix::Fbcsr *a, - const matrix::Dense *b, - matrix::Dense *c) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* a, + const matrix::Dense* b, + matrix::Dense* c) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Fbcsr *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) GKO_NOT_IMPLEMENTED; + const matrix::Dense* alpha, + const matrix::Fbcsr* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); @@ -83,8 +83,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Fbcsr *source, - matrix::Dense *result) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* source, + matrix::Dense* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CONVERT_TO_DENSE_KERNEL); @@ -92,8 +92,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(const std::shared_ptr exec, - const matrix::Fbcsr *const source, - matrix::Csr *const result) + const matrix::Fbcsr* const source, + matrix::Csr* const result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -102,8 +102,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void transpose(std::shared_ptr exec, - const matrix::Fbcsr *orig, - matrix::Fbcsr *trans) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* orig, + matrix::Fbcsr* trans) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); @@ -111,8 +111,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void conj_transpose(std::shared_ptr exec, - const matrix::Fbcsr *orig, - matrix::Fbcsr *trans) + const matrix::Fbcsr* orig, + matrix::Fbcsr* trans) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -122,8 +122,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_max_nnz_per_row( std::shared_ptr exec, - const matrix::Fbcsr *source, - size_type *result) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* source, + size_type* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); @@ -132,8 +132,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_nonzeros_per_row( std::shared_ptr exec, - const matrix::Fbcsr *source, - Array *result) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* source, + Array* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); @@ -142,8 +142,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::Fbcsr *to_check, - bool *is_sorted) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* to_check, + bool* is_sorted) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); @@ -151,7 +151,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void sort_by_column_index(const std::shared_ptr exec, - matrix::Fbcsr *const to_sort) + matrix::Fbcsr* const to_sort) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -160,8 +160,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Fbcsr *orig, - matrix::Diagonal *diag) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* orig, + matrix::Diagonal* diag) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); diff --git a/hip/matrix/hybrid_kernels.hip.cpp b/hip/matrix/hybrid_kernels.hip.cpp index 1d7f46ea743..747445d73fc 100644 --- a/hip/matrix/hybrid_kernels.hip.cpp +++ b/hip/matrix/hybrid_kernels.hip.cpp @@ -74,8 +74,8 @@ constexpr int warps_in_block = 4; template void convert_to_dense(std::shared_ptr exec, - const matrix::Hybrid *source, - matrix::Dense *result) GKO_NOT_IMPLEMENTED; + const matrix::Hybrid* source, + matrix::Dense* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_HYBRID_CONVERT_TO_DENSE_KERNEL); @@ -83,8 +83,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Hybrid *source, - matrix::Csr *result) + const matrix::Hybrid* source, + matrix::Csr* result) { const auto num_rows = source->get_size()[0]; auto coo_offset = Array(exec, num_rows + 1); @@ -154,8 +154,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Hybrid *source, - size_type *result) + const matrix::Hybrid* source, + size_type* result) { size_type ell_nnz = 0; size_type coo_nnz = 0; diff --git a/hip/matrix/sellp_kernels.hip.cpp b/hip/matrix/sellp_kernels.hip.cpp index 52f4f38e798..ff5a81aabf0 100644 --- a/hip/matrix/sellp_kernels.hip.cpp +++ b/hip/matrix/sellp_kernels.hip.cpp @@ -70,8 +70,8 @@ constexpr int default_block_size = 512; template void spmv(std::shared_ptr exec, - const matrix::Sellp *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Sellp* a, + const matrix::Dense* b, matrix::Dense* c) { const dim3 blockSize(matrix::default_slice_size); const dim3 gridSize(ceildiv(a->get_size()[0], matrix::default_slice_size), @@ -90,11 +90,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SELLP_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Sellp *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Sellp* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { const dim3 blockSize(matrix::default_slice_size); const dim3 gridSize(ceildiv(a->get_size()[0], matrix::default_slice_size), @@ -116,8 +116,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Sellp *source, - matrix::Dense *result) + const matrix::Sellp* source, + matrix::Dense* result) { const auto num_rows = source->get_size()[0]; const auto num_cols = source->get_size()[1]; @@ -161,8 +161,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Sellp *source, - matrix::Csr *result) + const matrix::Sellp* source, + matrix::Csr* result) { const auto num_rows = source->get_size()[0]; const auto slice_size = source->get_slice_size(); @@ -206,8 +206,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Sellp *source, - size_type *result) + const matrix::Sellp* source, + size_type* result) { const auto num_rows = source->get_size()[0]; @@ -238,8 +238,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Sellp *orig, - matrix::Diagonal *diag) + const matrix::Sellp* orig, + matrix::Diagonal* diag) { const auto diag_size = diag->get_size()[0]; const auto slice_size = orig->get_slice_size(); diff --git a/hip/matrix/sparsity_csr_kernels.hip.cpp b/hip/matrix/sparsity_csr_kernels.hip.cpp index 6747806eca4..1e317a91aee 100644 --- a/hip/matrix/sparsity_csr_kernels.hip.cpp +++ b/hip/matrix/sparsity_csr_kernels.hip.cpp @@ -49,9 +49,9 @@ namespace sparsity_csr { template void spmv(std::shared_ptr exec, - const matrix::SparsityCsr *a, - const matrix::Dense *b, - matrix::Dense *c) GKO_NOT_IMPLEMENTED; + const matrix::SparsityCsr* a, + const matrix::Dense* b, + matrix::Dense* c) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_SPARSITY_CSR_SPMV_KERNEL); @@ -59,11 +59,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::SparsityCsr *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) GKO_NOT_IMPLEMENTED; + const matrix::Dense* alpha, + const matrix::SparsityCsr* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_SPARSITY_CSR_ADVANCED_SPMV_KERNEL); @@ -72,8 +72,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_num_diagonal_elements( std::shared_ptr exec, - const matrix::SparsityCsr *matrix, - size_type *num_diagonal_elements) GKO_NOT_IMPLEMENTED; + const matrix::SparsityCsr* matrix, + size_type* num_diagonal_elements) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_SPARSITY_CSR_COUNT_NUM_DIAGONAL_ELEMENTS_KERNEL); @@ -81,9 +81,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void remove_diagonal_elements( - std::shared_ptr exec, const IndexType *row_ptrs, - const IndexType *col_idxs, - matrix::SparsityCsr *matrix) GKO_NOT_IMPLEMENTED; + std::shared_ptr exec, const IndexType* row_ptrs, + const IndexType* col_idxs, + matrix::SparsityCsr* matrix) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_SPARSITY_CSR_REMOVE_DIAGONAL_ELEMENTS_KERNEL); @@ -91,8 +91,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void transpose(std::shared_ptr exec, - const matrix::SparsityCsr *orig, - matrix::SparsityCsr *trans) + const matrix::SparsityCsr* orig, + matrix::SparsityCsr* trans) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -101,7 +101,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void sort_by_column_index(std::shared_ptr exec, - matrix::SparsityCsr *to_sort) + matrix::SparsityCsr* to_sort) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -111,8 +111,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::SparsityCsr *to_check, - bool *is_sorted) GKO_NOT_IMPLEMENTED; + const matrix::SparsityCsr* to_check, + bool* is_sorted) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_SPARSITY_CSR_IS_SORTED_BY_COLUMN_INDEX); diff --git a/hip/multigrid/amgx_pgm_kernels.hip.cpp b/hip/multigrid/amgx_pgm_kernels.hip.cpp index ddc9dd90333..b6fb7890187 100644 --- a/hip/multigrid/amgx_pgm_kernels.hip.cpp +++ b/hip/multigrid/amgx_pgm_kernels.hip.cpp @@ -77,8 +77,8 @@ constexpr int default_block_size = 512; template void match_edge(std::shared_ptr exec, - const Array &strongest_neighbor, - Array &agg) + const Array& strongest_neighbor, + Array& agg) { const auto num = agg.get_num_elems(); const dim3 grid(ceildiv(num, default_block_size)); @@ -92,7 +92,7 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_MATCH_EDGE_KERNEL); template void count_unagg(std::shared_ptr exec, - const Array &agg, IndexType *num_unagg) + const Array& agg, IndexType* num_unagg) { Array active_agg(exec, agg.get_num_elems()); const dim3 grid(ceildiv(active_agg.get_num_elems(), default_block_size)); @@ -108,8 +108,8 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_COUNT_UNAGG_KERNEL); template -void renumber(std::shared_ptr exec, Array &agg, - IndexType *num_agg) +void renumber(std::shared_ptr exec, Array& agg, + IndexType* num_agg) { const auto num = agg.get_num_elems(); Array agg_map(exec, num + 1); @@ -130,9 +130,9 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_RENUMBER_KERNEL); template void find_strongest_neighbor( std::shared_ptr exec, - const matrix::Csr *weight_mtx, - const matrix::Diagonal *diag, Array &agg, - Array &strongest_neighbor) + const matrix::Csr* weight_mtx, + const matrix::Diagonal* diag, Array& agg, + Array& strongest_neighbor) { const auto num = agg.get_num_elems(); const dim3 grid(ceildiv(num, default_block_size)); @@ -150,10 +150,10 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( template void assign_to_exist_agg(std::shared_ptr exec, - const matrix::Csr *weight_mtx, - const matrix::Diagonal *diag, - Array &agg, - Array &intermediate_agg) + const matrix::Csr* weight_mtx, + const matrix::Diagonal* diag, + Array& agg, + Array& intermediate_agg) { const auto num = agg.get_num_elems(); const dim3 grid(ceildiv(num, default_block_size)); diff --git a/hip/preconditioner/isai_kernels.hip.cpp b/hip/preconditioner/isai_kernels.hip.cpp index 3a96cacfecb..5462ddd3efc 100644 --- a/hip/preconditioner/isai_kernels.hip.cpp +++ b/hip/preconditioner/isai_kernels.hip.cpp @@ -76,9 +76,9 @@ constexpr int default_block_size{subwarps_per_block * subwarp_size}; template void generate_tri_inverse(std::shared_ptr exec, - const matrix::Csr *input, - matrix::Csr *inverse, - IndexType *excess_rhs_ptrs, IndexType *excess_nz_ptrs, + const matrix::Csr* input, + matrix::Csr* inverse, + IndexType* excess_rhs_ptrs, IndexType* excess_nz_ptrs, bool lower) { const auto num_rows = input->get_size()[0]; @@ -114,10 +114,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void generate_general_inverse(std::shared_ptr exec, - const matrix::Csr *input, - matrix::Csr *inverse, - IndexType *excess_rhs_ptrs, - IndexType *excess_nz_ptrs, bool spd) + const matrix::Csr* input, + matrix::Csr* inverse, + IndexType* excess_rhs_ptrs, + IndexType* excess_nz_ptrs, bool spd) { const auto num_rows = input->get_size()[0]; @@ -141,12 +141,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void generate_excess_system(std::shared_ptr exec, - const matrix::Csr *input, - const matrix::Csr *inverse, - const IndexType *excess_rhs_ptrs, - const IndexType *excess_nz_ptrs, - matrix::Csr *excess_system, - matrix::Dense *excess_rhs, + const matrix::Csr* input, + const matrix::Csr* inverse, + const IndexType* excess_rhs_ptrs, + const IndexType* excess_nz_ptrs, + matrix::Csr* excess_system, + matrix::Dense* excess_rhs, size_type e_start, size_type e_end) { const auto num_rows = input->get_size()[0]; @@ -170,8 +170,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void scale_excess_solution(std::shared_ptr, - const IndexType *excess_block_ptrs, - matrix::Dense *excess_solution, + const IndexType* excess_block_ptrs, + matrix::Dense* excess_solution, size_type e_start, size_type e_end) { const dim3 block(default_block_size, 1, 1); @@ -188,9 +188,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void scatter_excess_solution(std::shared_ptr exec, - const IndexType *excess_rhs_ptrs, - const matrix::Dense *excess_solution, - matrix::Csr *inverse, + const IndexType* excess_rhs_ptrs, + const matrix::Dense* excess_solution, + matrix::Csr* inverse, size_type e_start, size_type e_end) { const auto num_rows = inverse->get_size()[0]; diff --git a/hip/preconditioner/jacobi_advanced_apply_kernel.hip.cpp b/hip/preconditioner/jacobi_advanced_apply_kernel.hip.cpp index 6ac6717447a..a3b210a0fdd 100644 --- a/hip/preconditioner/jacobi_advanced_apply_kernel.hip.cpp +++ b/hip/preconditioner/jacobi_advanced_apply_kernel.hip.cpp @@ -73,12 +73,12 @@ template void advanced_apply( syn::value_list, size_type num_blocks, - const precision_reduction *block_precisions, - const IndexType *block_pointers, const ValueType *blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - const ValueType *alpha, const ValueType *b, size_type b_stride, - ValueType *x, size_type x_stride) + const precision_reduction* block_precisions, + const IndexType* block_pointers, const ValueType* blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + const ValueType* alpha, const ValueType* b, size_type b_stride, + ValueType* x, size_type x_stride) { constexpr int subwarp_size = get_larger_power(max_block_size); constexpr int blocks_per_warp = config::warp_size / subwarp_size; @@ -114,14 +114,14 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_advanced_apply, advanced_apply); template void apply(std::shared_ptr exec, size_type num_blocks, uint32 max_block_size, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - const Array &block_precisions, - const Array &block_pointers, - const Array &blocks, - const matrix::Dense *alpha, - const matrix::Dense *b, - const matrix::Dense *beta, matrix::Dense *x) + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + const Array& block_precisions, + const Array& block_pointers, + const Array& blocks, + const matrix::Dense* alpha, + const matrix::Dense* b, + const matrix::Dense* beta, matrix::Dense* x) { // TODO: write a special kernel for multiple RHS dense::scale(exec, beta, x); diff --git a/hip/preconditioner/jacobi_generate_kernel.hip.cpp b/hip/preconditioner/jacobi_generate_kernel.hip.cpp index 6f0e4789f3e..4b398b4f75d 100644 --- a/hip/preconditioner/jacobi_generate_kernel.hip.cpp +++ b/hip/preconditioner/jacobi_generate_kernel.hip.cpp @@ -75,13 +75,13 @@ namespace { template void generate(syn::value_list, - const matrix::Csr *mtx, - remove_complex accuracy, ValueType *block_data, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - remove_complex *conditioning, - precision_reduction *block_precisions, - const IndexType *block_ptrs, size_type num_blocks) + const matrix::Csr* mtx, + remove_complex accuracy, ValueType* block_data, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + remove_complex* conditioning, + precision_reduction* block_precisions, + const IndexType* block_ptrs, size_type num_blocks) { constexpr int subwarp_size = get_larger_power(max_block_size); constexpr int blocks_per_warp = config::warp_size / subwarp_size; @@ -118,14 +118,14 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_generate, generate); template void generate(std::shared_ptr exec, - const matrix::Csr *system_matrix, + const matrix::Csr* system_matrix, size_type num_blocks, uint32 max_block_size, remove_complex accuracy, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - Array> &conditioning, - Array &block_precisions, - const Array &block_pointers, Array &blocks) + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + Array>& conditioning, + Array& block_precisions, + const Array& block_pointers, Array& blocks) { components::fill_array(exec, blocks.get_data(), blocks.get_num_elems(), zero()); diff --git a/hip/preconditioner/jacobi_kernels.hip.cpp b/hip/preconditioner/jacobi_kernels.hip.cpp index 40e3aff69c5..6c42a3a41b5 100644 --- a/hip/preconditioner/jacobi_kernels.hip.cpp +++ b/hip/preconditioner/jacobi_kernels.hip.cpp @@ -78,9 +78,9 @@ constexpr int default_grid_size = 32 * 32 * 128; template size_type find_natural_blocks(std::shared_ptr exec, - const matrix::Csr *mtx, + const matrix::Csr* mtx, int32 max_block_size, - IndexType *__restrict__ block_ptrs) + IndexType* __restrict__ block_ptrs) { Array nums(exec, 1); @@ -105,7 +105,7 @@ size_type find_natural_blocks(std::shared_ptr exec, template inline size_type agglomerate_supervariables( std::shared_ptr exec, int32 max_block_size, - size_type num_natural_blocks, IndexType *block_ptrs) + size_type num_natural_blocks, IndexType* block_ptrs) { Array nums(exec, 1); @@ -122,8 +122,8 @@ inline size_type agglomerate_supervariables( void initialize_precisions(std::shared_ptr exec, - const Array &source, - Array &precisions) + const Array& source, + Array& precisions) { const auto block_size = default_num_warps * config::warp_size; const auto grid_size = min( @@ -138,9 +138,9 @@ void initialize_precisions(std::shared_ptr exec, template void find_blocks(std::shared_ptr exec, - const matrix::Csr *system_matrix, - uint32 max_block_size, size_type &num_blocks, - Array &block_pointers) + const matrix::Csr* system_matrix, + uint32 max_block_size, size_type& num_blocks, + Array& block_pointers) { auto num_natural_blocks = find_natural_blocks( exec, system_matrix, max_block_size, block_pointers.get_data()); @@ -159,11 +159,11 @@ template void transpose_jacobi( syn::value_list, size_type num_blocks, - const precision_reduction *block_precisions, - const IndexType *block_pointers, const ValueType *blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - ValueType *out_blocks) + const precision_reduction* block_precisions, + const IndexType* block_pointers, const ValueType* blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + ValueType* out_blocks) { constexpr int subwarp_size = get_larger_power(max_block_size); constexpr int blocks_per_warp = config::warp_size / subwarp_size; @@ -198,11 +198,11 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_transpose_jacobi, transpose_jacobi); template void transpose_jacobi( std::shared_ptr exec, size_type num_blocks, - uint32 max_block_size, const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - Array &out_blocks) + uint32 max_block_size, const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + Array& out_blocks) { select_transpose_jacobi( compiled_kernels(), @@ -222,11 +222,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void conj_transpose_jacobi( std::shared_ptr exec, size_type num_blocks, - uint32 max_block_size, const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - Array &out_blocks) + uint32 max_block_size, const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + Array& out_blocks) { select_transpose_jacobi( compiled_kernels(), @@ -246,11 +246,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense( std::shared_ptr exec, size_type num_blocks, - const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - ValueType *result_values, size_type result_stride) GKO_NOT_IMPLEMENTED; + const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + ValueType* result_values, size_type result_stride) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_JACOBI_CONVERT_TO_DENSE_KERNEL); diff --git a/hip/preconditioner/jacobi_simple_apply_kernel.hip.cpp b/hip/preconditioner/jacobi_simple_apply_kernel.hip.cpp index 0adbedc7473..c871f94043d 100644 --- a/hip/preconditioner/jacobi_simple_apply_kernel.hip.cpp +++ b/hip/preconditioner/jacobi_simple_apply_kernel.hip.cpp @@ -72,11 +72,11 @@ namespace { template void apply(syn::value_list, size_type num_blocks, - const precision_reduction *block_precisions, - const IndexType *block_pointers, const ValueType *blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - const ValueType *b, size_type b_stride, ValueType *x, + const precision_reduction* block_precisions, + const IndexType* block_pointers, const ValueType* blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + const ValueType* b, size_type b_stride, ValueType* x, size_type x_stride) { constexpr int subwarp_size = get_larger_power(max_block_size); @@ -112,11 +112,11 @@ template void simple_apply( std::shared_ptr exec, size_type num_blocks, uint32 max_block_size, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const matrix::Dense *b, matrix::Dense *x) + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const matrix::Dense* b, matrix::Dense* x) { // TODO: write a special kernel for multiple RHS for (size_type col = 0; col < b->get_size()[1]; ++col) { diff --git a/hip/reorder/rcm_kernels.hip.cpp b/hip/reorder/rcm_kernels.hip.cpp index 2f5f8c32ef3..39460d0829f 100644 --- a/hip/reorder/rcm_kernels.hip.cpp +++ b/hip/reorder/rcm_kernels.hip.cpp @@ -60,8 +60,8 @@ namespace rcm { template void get_degree_of_nodes(std::shared_ptr exec, const IndexType num_vertices, - const IndexType *const row_ptrs, - IndexType *const degrees) GKO_NOT_IMPLEMENTED; + const IndexType* const row_ptrs, + IndexType* const degrees) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL); @@ -69,9 +69,9 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL); template void get_permutation( std::shared_ptr exec, const IndexType num_vertices, - const IndexType *const row_ptrs, const IndexType *const col_idxs, - const IndexType *const degrees, IndexType *const permutation, - IndexType *const inv_permutation, + const IndexType* const row_ptrs, const IndexType* const col_idxs, + const IndexType* const degrees, IndexType* const permutation, + IndexType* const inv_permutation, const gko::reorder::starting_strategy strategy) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_PERMUTATION_KERNEL); diff --git a/hip/solver/cb_gmres_kernels.hip.cpp b/hip/solver/cb_gmres_kernels.hip.cpp index 23bf0546cd6..be77f4d6570 100644 --- a/hip/solver/cb_gmres_kernels.hip.cpp +++ b/hip/solver/cb_gmres_kernels.hip.cpp @@ -82,7 +82,7 @@ constexpr int default_dot_size = default_dot_dim * default_dot_dim; // Specialization, so the Accessor can use the same function as regular pointers template GKO_INLINE auto as_hip_accessor( - const acc::range> &acc) + const acc::range>& acc) { return acc::range< acc::reduced_row_major, hip_type>>( @@ -93,8 +93,8 @@ GKO_INLINE auto as_hip_accessor( template GKO_INLINE auto as_hip_accessor( - const acc::range> - &acc) + const acc::range>& + acc) { return acc::range, hip_type, mask>>( @@ -107,7 +107,7 @@ GKO_INLINE auto as_hip_accessor( template -void zero_matrix(size_type m, size_type n, size_type stride, ValueType *array) +void zero_matrix(size_type m, size_type n, size_type stride, ValueType* array) { const dim3 block_size(default_block_size, 1, 1); const dim3 grid_size(ceildiv(n, block_size.x), 1, 1); @@ -118,11 +118,11 @@ void zero_matrix(size_type m, size_type n, size_type stride, ValueType *array) template void initialize_1(std::shared_ptr exec, - const matrix::Dense *b, - matrix::Dense *residual, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - Array *stop_status, size_type krylov_dim) + const matrix::Dense* b, + matrix::Dense* residual, + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + Array* stop_status, size_type krylov_dim) { const auto num_threads = std::max(b->get_size()[0] * b->get_stride(), krylov_dim * b->get_size()[1]); @@ -145,13 +145,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CB_GMRES_INITIALIZE_1_KERNEL); template void initialize_2(std::shared_ptr exec, - const matrix::Dense *residual, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - matrix::Dense> *arnoldi_norm, + const matrix::Dense* residual, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + matrix::Dense>* arnoldi_norm, Accessor3d krylov_bases, - matrix::Dense *next_krylov_basis, - Array *final_iter_nums, size_type krylov_dim) + matrix::Dense* next_krylov_basis, + Array* final_iter_nums, size_type krylov_dim) { constexpr bool use_scalar = gko::cb_gmres::detail::has_3d_scaled_accessor::value; @@ -218,14 +218,14 @@ GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE( template void finish_arnoldi_CGS(std::shared_ptr exec, - matrix::Dense *next_krylov_basis, + matrix::Dense* next_krylov_basis, Accessor3dim krylov_bases, - matrix::Dense *hessenberg_iter, - matrix::Dense *buffer_iter, - matrix::Dense> *arnoldi_norm, - size_type iter, const stopping_status *stop_status, - stopping_status *reorth_status, - Array *num_reorth) + matrix::Dense* hessenberg_iter, + matrix::Dense* buffer_iter, + matrix::Dense>* arnoldi_norm, + size_type iter, const stopping_status* stop_status, + stopping_status* reorth_status, + Array* num_reorth) { using non_complex = remove_complex; // optimization parameter @@ -401,12 +401,12 @@ void finish_arnoldi_CGS(std::shared_ptr exec, template void givens_rotation(std::shared_ptr exec, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense *hessenberg_iter, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - size_type iter, const Array *stop_status) + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense* hessenberg_iter, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + size_type iter, const Array* stop_status) { // TODO: tune block_size for optimal performance constexpr auto block_size = default_block_size; @@ -430,17 +430,17 @@ void givens_rotation(std::shared_ptr exec, template void step_1(std::shared_ptr exec, - matrix::Dense *next_krylov_basis, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - Accessor3d krylov_bases, matrix::Dense *hessenberg_iter, - matrix::Dense *buffer_iter, - matrix::Dense> *arnoldi_norm, - size_type iter, Array *final_iter_nums, - const Array *stop_status, - Array *reorth_status, Array *num_reorth) + matrix::Dense* next_krylov_basis, + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + Accessor3d krylov_bases, matrix::Dense* hessenberg_iter, + matrix::Dense* buffer_iter, + matrix::Dense>* arnoldi_norm, + size_type iter, Array* final_iter_nums, + const Array* stop_status, + Array* reorth_status, Array* num_reorth) { hipLaunchKernelGGL( increase_final_iteration_numbers_kernel, @@ -463,9 +463,9 @@ GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_STEP_1_KERNEL); template void solve_upper_triangular( - const matrix::Dense *residual_norm_collection, - const matrix::Dense *hessenberg, matrix::Dense *y, - const Array *final_iter_nums) + const matrix::Dense* residual_norm_collection, + const matrix::Dense* hessenberg, matrix::Dense* y, + const Array* final_iter_nums) { // TODO: tune block_size for optimal performance constexpr auto block_size = default_block_size; @@ -487,9 +487,9 @@ void solve_upper_triangular( template void calculate_qy(ConstAccessor3d krylov_bases, size_type num_krylov_bases, - const matrix::Dense *y, - matrix::Dense *before_preconditioner, - const Array *final_iter_nums) + const matrix::Dense* y, + matrix::Dense* before_preconditioner, + const Array* final_iter_nums) { const auto num_rows = before_preconditioner->get_size()[0]; const auto num_cols = before_preconditioner->get_size()[1]; @@ -517,12 +517,12 @@ void calculate_qy(ConstAccessor3d krylov_bases, size_type num_krylov_bases, template void step_2(std::shared_ptr exec, - const matrix::Dense *residual_norm_collection, + const matrix::Dense* residual_norm_collection, ConstAccessor3d krylov_bases, - const matrix::Dense *hessenberg, - matrix::Dense *y, - matrix::Dense *before_preconditioner, - const Array *final_iter_nums) + const matrix::Dense* hessenberg, + matrix::Dense* y, + matrix::Dense* before_preconditioner, + const Array* final_iter_nums) { // since hessenberg has dims: iters x iters * num_rhs // krylov_bases has dims: (iters + 1) x sysmtx[0] x num_rhs diff --git a/hip/solver/common_trs_kernels.hip.hpp b/hip/solver/common_trs_kernels.hip.hpp index 8698b23383f..0d0128715b3 100644 --- a/hip/solver/common_trs_kernels.hip.hpp +++ b/hip/solver/common_trs_kernels.hip.hpp @@ -72,7 +72,7 @@ struct SolveStruct : gko::solver::SolveStruct { hipsparseSolvePolicy_t policy; hipsparseMatDescr_t factor_descr; int factor_work_size; - void *factor_work_vec; + void* factor_work_vec; SolveStruct() { factor_work_vec = nullptr; @@ -87,13 +87,13 @@ struct SolveStruct : gko::solver::SolveStruct { policy = HIPSPARSE_SOLVE_POLICY_USE_LEVEL; } - SolveStruct(const SolveStruct &) = delete; + SolveStruct(const SolveStruct&) = delete; - SolveStruct(SolveStruct &&) = delete; + SolveStruct(SolveStruct&&) = delete; - SolveStruct &operator=(const SolveStruct &) = delete; + SolveStruct& operator=(const SolveStruct&) = delete; - SolveStruct &operator=(SolveStruct &&) = delete; + SolveStruct& operator=(SolveStruct&&) = delete; ~SolveStruct() { @@ -119,14 +119,14 @@ namespace { void should_perform_transpose_kernel(std::shared_ptr exec, - bool &do_transpose) + bool& do_transpose) { do_transpose = true; } void init_struct_kernel(std::shared_ptr exec, - std::shared_ptr &solve_struct) + std::shared_ptr& solve_struct) { solve_struct = std::make_shared(); } @@ -134,13 +134,13 @@ void init_struct_kernel(std::shared_ptr exec, template void generate_kernel(std::shared_ptr exec, - const matrix::Csr *matrix, - solver::SolveStruct *solve_struct, + const matrix::Csr* matrix, + solver::SolveStruct* solve_struct, const gko::size_type num_rhs, bool is_upper) { if (hipsparse::is_supported::value) { if (auto hip_solve_struct = - dynamic_cast(solve_struct)) { + dynamic_cast(solve_struct)) { auto handle = exec->get_hipsparse_handle(); if (is_upper) { GKO_ASSERT_NO_HIPSPARSE_ERRORS(hipsparseSetMatFillMode( @@ -162,7 +162,7 @@ void generate_kernel(std::shared_ptr exec, exec->free(hip_solve_struct->factor_work_vec); } hip_solve_struct->factor_work_vec = - exec->alloc(hip_solve_struct->factor_work_size); + exec->alloc(hip_solve_struct->factor_work_size); hipsparse::csrsv2_analysis( handle, HIPSPARSE_OPERATION_NON_TRANSPOSE, @@ -183,18 +183,18 @@ void generate_kernel(std::shared_ptr exec, template void solve_kernel(std::shared_ptr exec, - const matrix::Csr *matrix, - const solver::SolveStruct *solve_struct, - matrix::Dense *trans_b, - matrix::Dense *trans_x, - const matrix::Dense *b, - matrix::Dense *x) + const matrix::Csr* matrix, + const solver::SolveStruct* solve_struct, + matrix::Dense* trans_b, + matrix::Dense* trans_x, + const matrix::Dense* b, + matrix::Dense* x) { using vec = matrix::Dense; if (hipsparse::is_supported::value) { if (auto hip_solve_struct = - dynamic_cast(solve_struct)) { + dynamic_cast(solve_struct)) { ValueType one = 1.0; auto handle = exec->get_hipsparse_handle(); diff --git a/hip/solver/gmres_kernels.hip.cpp b/hip/solver/gmres_kernels.hip.cpp index 38c6477e516..31eb7b4acd8 100644 --- a/hip/solver/gmres_kernels.hip.cpp +++ b/hip/solver/gmres_kernels.hip.cpp @@ -81,11 +81,11 @@ constexpr int default_dot_size = default_dot_dim * default_dot_dim; template void initialize_1(std::shared_ptr exec, - const matrix::Dense *b, - matrix::Dense *residual, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - Array *stop_status, size_type krylov_dim) + const matrix::Dense* b, + matrix::Dense* residual, + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + Array* stop_status, size_type krylov_dim) { const auto num_threads = std::max(b->get_size()[0] * b->get_stride(), krylov_dim * b->get_size()[1]); @@ -108,11 +108,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_INITIALIZE_1_KERNEL); template void initialize_2(std::shared_ptr exec, - const matrix::Dense *residual, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - matrix::Dense *krylov_bases, - Array *final_iter_nums, size_type krylov_dim) + const matrix::Dense* residual, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + matrix::Dense* krylov_bases, + Array* final_iter_nums, size_type krylov_dim) { const auto num_rows = residual->get_size()[0]; const auto num_rhs = residual->get_size()[1]; @@ -142,9 +142,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_INITIALIZE_2_KERNEL); template void finish_arnoldi(std::shared_ptr exec, size_type num_rows, - matrix::Dense *krylov_bases, - matrix::Dense *hessenberg_iter, size_type iter, - const stopping_status *stop_status) + matrix::Dense* krylov_bases, + matrix::Dense* hessenberg_iter, size_type iter, + const stopping_status* stop_status) { const auto stride_krylov = krylov_bases->get_stride(); const auto stride_hessenberg = hessenberg_iter->get_stride(); @@ -215,12 +215,12 @@ void finish_arnoldi(std::shared_ptr exec, size_type num_rows, template void givens_rotation(std::shared_ptr exec, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense *hessenberg_iter, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - size_type iter, const Array *stop_status) + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense* hessenberg_iter, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + size_type iter, const Array* stop_status) { // TODO: tune block_size for optimal performance constexpr auto block_size = default_block_size; @@ -245,14 +245,14 @@ void givens_rotation(std::shared_ptr exec, template void step_1(std::shared_ptr exec, size_type num_rows, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - matrix::Dense *krylov_bases, - matrix::Dense *hessenberg_iter, size_type iter, - Array *final_iter_nums, - const Array *stop_status) + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + matrix::Dense* krylov_bases, + matrix::Dense* hessenberg_iter, size_type iter, + Array* final_iter_nums, + const Array* stop_status) { hipLaunchKernelGGL( increase_final_iteration_numbers_kernel, @@ -273,9 +273,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_STEP_1_KERNEL); template void solve_upper_triangular( - const matrix::Dense *residual_norm_collection, - const matrix::Dense *hessenberg, matrix::Dense *y, - const Array *final_iter_nums) + const matrix::Dense* residual_norm_collection, + const matrix::Dense* hessenberg, matrix::Dense* y, + const Array* final_iter_nums) { // TODO: tune block_size for optimal performance constexpr auto block_size = default_block_size; @@ -296,10 +296,10 @@ void solve_upper_triangular( template -void calculate_qy(const matrix::Dense *krylov_bases, - const matrix::Dense *y, - matrix::Dense *before_preconditioner, - const Array *final_iter_nums) +void calculate_qy(const matrix::Dense* krylov_bases, + const matrix::Dense* y, + matrix::Dense* before_preconditioner, + const Array* final_iter_nums) { const auto num_rows = before_preconditioner->get_size()[0]; const auto num_cols = krylov_bases->get_size()[1]; @@ -330,12 +330,12 @@ void calculate_qy(const matrix::Dense *krylov_bases, template void step_2(std::shared_ptr exec, - const matrix::Dense *residual_norm_collection, - const matrix::Dense *krylov_bases, - const matrix::Dense *hessenberg, - matrix::Dense *y, - matrix::Dense *before_preconditioner, - const Array *final_iter_nums) + const matrix::Dense* residual_norm_collection, + const matrix::Dense* krylov_bases, + const matrix::Dense* hessenberg, + matrix::Dense* y, + matrix::Dense* before_preconditioner, + const Array* final_iter_nums) { solve_upper_triangular(residual_norm_collection, hessenberg, y, final_iter_nums); diff --git a/hip/solver/idr_kernels.hip.cpp b/hip/solver/idr_kernels.hip.cpp index 1e9cb66aa2b..cc61e41fb2a 100644 --- a/hip/solver/idr_kernels.hip.cpp +++ b/hip/solver/idr_kernels.hip.cpp @@ -78,8 +78,8 @@ namespace { template -void initialize_m(const size_type nrhs, matrix::Dense *m, - Array *stop_status) +void initialize_m(const size_type nrhs, matrix::Dense* m, + Array* stop_status) { const auto subspace_dim = m->get_size()[0]; const auto m_stride = m->get_stride(); @@ -93,7 +93,7 @@ void initialize_m(const size_type nrhs, matrix::Dense *m, template -void initialize_subspace_vectors(matrix::Dense *subspace_vectors, +void initialize_subspace_vectors(matrix::Dense* subspace_vectors, bool deterministic) { if (deterministic) { @@ -113,7 +113,7 @@ void initialize_subspace_vectors(matrix::Dense *subspace_vectors, template -void orthonormalize_subspace_vectors(matrix::Dense *subspace_vectors) +void orthonormalize_subspace_vectors(matrix::Dense* subspace_vectors) { hipLaunchKernelGGL( HIP_KERNEL_NAME( @@ -127,10 +127,10 @@ void orthonormalize_subspace_vectors(matrix::Dense *subspace_vectors) template void solve_lower_triangular(const size_type nrhs, - const matrix::Dense *m, - const matrix::Dense *f, - matrix::Dense *c, - const Array *stop_status) + const matrix::Dense* m, + const matrix::Dense* f, + matrix::Dense* c, + const Array* stop_status) { const auto subspace_dim = m->get_size()[0]; @@ -147,12 +147,12 @@ void solve_lower_triangular(const size_type nrhs, template void update_g_and_u(std::shared_ptr exec, const size_type nrhs, const size_type k, - const matrix::Dense *p, - const matrix::Dense *m, - matrix::Dense *alpha, - matrix::Dense *g, matrix::Dense *g_k, - matrix::Dense *u, - const Array *stop_status) + const matrix::Dense* p, + const matrix::Dense* m, + matrix::Dense* alpha, + matrix::Dense* g, matrix::Dense* g_k, + matrix::Dense* u, + const Array* stop_status) { const auto size = g->get_size()[0]; const auto p_stride = p->get_stride(); @@ -198,9 +198,9 @@ void update_g_and_u(std::shared_ptr exec, template void update_m(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense *p, - const matrix::Dense *g_k, matrix::Dense *m, - const Array *stop_status) + const size_type k, const matrix::Dense* p, + const matrix::Dense* g_k, matrix::Dense* m, + const Array* stop_status) { const auto size = g_k->get_size()[0]; const auto subspace_dim = m->get_size()[0]; @@ -232,12 +232,12 @@ void update_m(std::shared_ptr exec, const size_type nrhs, template void update_x_r_and_f(std::shared_ptr exec, const size_type nrhs, const size_type k, - const matrix::Dense *m, - const matrix::Dense *g, - const matrix::Dense *u, - matrix::Dense *f, matrix::Dense *r, - matrix::Dense *x, - const Array *stop_status) + const matrix::Dense* m, + const matrix::Dense* g, + const matrix::Dense* u, + matrix::Dense* f, matrix::Dense* r, + matrix::Dense* x, + const Array* stop_status) { const auto size = x->get_size()[0]; const auto subspace_dim = m->get_size()[0]; @@ -262,9 +262,9 @@ void update_x_r_and_f(std::shared_ptr exec, template void initialize(std::shared_ptr exec, const size_type nrhs, - matrix::Dense *m, - matrix::Dense *subspace_vectors, bool deterministic, - Array *stop_status) + matrix::Dense* m, + matrix::Dense* subspace_vectors, bool deterministic, + Array* stop_status) { initialize_m(nrhs, m, stop_status); initialize_subspace_vectors(subspace_vectors, deterministic); @@ -276,12 +276,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense *m, - const matrix::Dense *f, - const matrix::Dense *residual, - const matrix::Dense *g, matrix::Dense *c, - matrix::Dense *v, - const Array *stop_status) + const size_type k, const matrix::Dense* m, + const matrix::Dense* f, + const matrix::Dense* residual, + const matrix::Dense* g, matrix::Dense* c, + matrix::Dense* v, + const Array* stop_status) { solve_lower_triangular(nrhs, m, f, c, stop_status); @@ -304,10 +304,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense *omega, - const matrix::Dense *preconditioned_vector, - const matrix::Dense *c, matrix::Dense *u, - const Array *stop_status) + const size_type k, const matrix::Dense* omega, + const matrix::Dense* preconditioned_vector, + const matrix::Dense* c, matrix::Dense* u, + const Array* stop_status) { const auto num_rows = preconditioned_vector->get_size()[0]; const auto subspace_dim = u->get_size()[1] / nrhs; @@ -328,12 +328,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_2_KERNEL); template void step_3(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense *p, - matrix::Dense *g, matrix::Dense *g_k, - matrix::Dense *u, matrix::Dense *m, - matrix::Dense *f, matrix::Dense *alpha, - matrix::Dense *residual, matrix::Dense *x, - const Array *stop_status) + const size_type k, const matrix::Dense* p, + matrix::Dense* g, matrix::Dense* g_k, + matrix::Dense* u, matrix::Dense* m, + matrix::Dense* f, matrix::Dense* alpha, + matrix::Dense* residual, matrix::Dense* x, + const Array* stop_status) { update_g_and_u(exec, nrhs, k, p, m, alpha, g, g_k, u, stop_status); update_m(exec, nrhs, k, p, g_k, m, stop_status); @@ -346,9 +346,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_3_KERNEL); template void compute_omega( std::shared_ptr exec, const size_type nrhs, - const remove_complex kappa, const matrix::Dense *tht, - const matrix::Dense> *residual_norm, - matrix::Dense *omega, const Array *stop_status) + const remove_complex kappa, const matrix::Dense* tht, + const matrix::Dense>* residual_norm, + matrix::Dense* omega, const Array* stop_status) { const auto grid_dim = ceildiv(nrhs, config::warp_size); hipLaunchKernelGGL(HIP_KERNEL_NAME(compute_omega_kernel), grid_dim, diff --git a/hip/solver/lower_trs_kernels.hip.cpp b/hip/solver/lower_trs_kernels.hip.cpp index 3eeb50185ac..0f7751a039d 100644 --- a/hip/solver/lower_trs_kernels.hip.cpp +++ b/hip/solver/lower_trs_kernels.hip.cpp @@ -63,14 +63,14 @@ namespace lower_trs { void should_perform_transpose(std::shared_ptr exec, - bool &do_transpose) + bool& do_transpose) { should_perform_transpose_kernel(exec, do_transpose); } void init_struct(std::shared_ptr exec, - std::shared_ptr &solve_struct) + std::shared_ptr& solve_struct) { init_struct_kernel(exec, solve_struct); } @@ -78,8 +78,8 @@ void init_struct(std::shared_ptr exec, template void generate(std::shared_ptr exec, - const matrix::Csr *matrix, - solver::SolveStruct *solve_struct, const gko::size_type num_rhs) + const matrix::Csr* matrix, + solver::SolveStruct* solve_struct, const gko::size_type num_rhs) { generate_kernel(exec, matrix, solve_struct, num_rhs, false); @@ -91,10 +91,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void solve(std::shared_ptr exec, - const matrix::Csr *matrix, - const solver::SolveStruct *solve_struct, - matrix::Dense *trans_b, matrix::Dense *trans_x, - const matrix::Dense *b, matrix::Dense *x) + const matrix::Csr* matrix, + const solver::SolveStruct* solve_struct, + matrix::Dense* trans_b, matrix::Dense* trans_x, + const matrix::Dense* b, matrix::Dense* x) { solve_kernel(exec, matrix, solve_struct, trans_b, trans_x, b, x); diff --git a/hip/solver/upper_trs_kernels.hip.cpp b/hip/solver/upper_trs_kernels.hip.cpp index 835e2f3803c..0dd3d788d67 100644 --- a/hip/solver/upper_trs_kernels.hip.cpp +++ b/hip/solver/upper_trs_kernels.hip.cpp @@ -63,14 +63,14 @@ namespace upper_trs { void should_perform_transpose(std::shared_ptr exec, - bool &do_transpose) + bool& do_transpose) { should_perform_transpose_kernel(exec, do_transpose); } void init_struct(std::shared_ptr exec, - std::shared_ptr &solve_struct) + std::shared_ptr& solve_struct) { init_struct_kernel(exec, solve_struct); } @@ -78,8 +78,8 @@ void init_struct(std::shared_ptr exec, template void generate(std::shared_ptr exec, - const matrix::Csr *matrix, - solver::SolveStruct *solve_struct, const gko::size_type num_rhs) + const matrix::Csr* matrix, + solver::SolveStruct* solve_struct, const gko::size_type num_rhs) { generate_kernel(exec, matrix, solve_struct, num_rhs, true); @@ -91,10 +91,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void solve(std::shared_ptr exec, - const matrix::Csr *matrix, - const solver::SolveStruct *solve_struct, - matrix::Dense *trans_b, matrix::Dense *trans_x, - const matrix::Dense *b, matrix::Dense *x) + const matrix::Csr* matrix, + const solver::SolveStruct* solve_struct, + matrix::Dense* trans_b, matrix::Dense* trans_x, + const matrix::Dense* b, matrix::Dense* x) { solve_kernel(exec, matrix, solve_struct, trans_b, trans_x, b, x); diff --git a/hip/stop/criterion_kernels.hip.cpp b/hip/stop/criterion_kernels.hip.cpp index 0443930660e..dbaa3782228 100644 --- a/hip/stop/criterion_kernels.hip.cpp +++ b/hip/stop/criterion_kernels.hip.cpp @@ -59,7 +59,7 @@ constexpr int default_block_size = 512; __global__ __launch_bounds__(default_block_size) void set_all_statuses( size_type num_elems, uint8 stoppingId, bool setFinalized, - stopping_status *stop_status) + stopping_status* stop_status) { const auto tidx = thread::get_thread_id_flat(); if (tidx < num_elems) { @@ -69,7 +69,7 @@ __global__ __launch_bounds__(default_block_size) void set_all_statuses( void set_all_statuses(std::shared_ptr exec, uint8 stoppingId, - bool setFinalized, Array *stop_status) + bool setFinalized, Array* stop_status) { const dim3 block_size(default_block_size, 1, 1); const dim3 grid_size(ceildiv(stop_status->get_num_elems(), block_size.x), 1, diff --git a/hip/stop/residual_norm_kernels.hip.cpp b/hip/stop/residual_norm_kernels.hip.cpp index e4b3ddf3b95..7d3d3435e37 100644 --- a/hip/stop/residual_norm_kernels.hip.cpp +++ b/hip/stop/residual_norm_kernels.hip.cpp @@ -63,10 +63,10 @@ constexpr int default_block_size = 512; template __global__ __launch_bounds__(default_block_size) void residual_norm_kernel( size_type num_cols, ValueType rel_residual_goal, - const ValueType *__restrict__ tau, const ValueType *__restrict__ orig_tau, + const ValueType* __restrict__ tau, const ValueType* __restrict__ orig_tau, uint8 stoppingId, bool setFinalized, - stopping_status *__restrict__ stop_status, - bool *__restrict__ device_storage) + stopping_status* __restrict__ stop_status, + bool* __restrict__ device_storage) { const auto tidx = thread::get_thread_id_flat(); if (tidx < num_cols) { @@ -84,7 +84,7 @@ __global__ __launch_bounds__(default_block_size) void residual_norm_kernel( __global__ __launch_bounds__(1) void init_kernel( - bool *__restrict__ device_storage) + bool* __restrict__ device_storage) { device_storage[0] = true; device_storage[1] = false; @@ -93,12 +93,12 @@ __global__ __launch_bounds__(1) void init_kernel( template void residual_norm(std::shared_ptr exec, - const matrix::Dense *tau, - const matrix::Dense *orig_tau, + const matrix::Dense* tau, + const matrix::Dense* orig_tau, ValueType rel_residual_goal, uint8 stoppingId, - bool setFinalized, Array *stop_status, - Array *device_storage, bool *all_converged, - bool *one_changed) + bool setFinalized, Array* stop_status, + Array* device_storage, bool* all_converged, + bool* one_changed) { static_assert(is_complex_s::value == false, "ValueType must not be complex in this function!"); @@ -142,11 +142,11 @@ template __global__ __launch_bounds__(default_block_size) void implicit_residual_norm_kernel( size_type num_cols, remove_complex rel_residual_goal, - const ValueType *__restrict__ tau, - const remove_complex *__restrict__ orig_tau, + const ValueType* __restrict__ tau, + const remove_complex* __restrict__ orig_tau, uint8 stoppingId, bool setFinalized, - stopping_status *__restrict__ stop_status, - bool *__restrict__ device_storage) + stopping_status* __restrict__ stop_status, + bool* __restrict__ device_storage) { const auto tidx = thread::get_thread_id_flat(); if (tidx < num_cols) { @@ -164,7 +164,7 @@ __global__ __global__ __launch_bounds__(1) void init_kernel( - bool *__restrict__ device_storage) + bool* __restrict__ device_storage) { device_storage[0] = true; device_storage[1] = false; @@ -174,11 +174,11 @@ __global__ __launch_bounds__(1) void init_kernel( template void implicit_residual_norm( std::shared_ptr exec, - const matrix::Dense *tau, - const matrix::Dense> *orig_tau, + const matrix::Dense* tau, + const matrix::Dense>* orig_tau, remove_complex rel_residual_goal, uint8 stoppingId, - bool setFinalized, Array *stop_status, - Array *device_storage, bool *all_converged, bool *one_changed) + bool setFinalized, Array* stop_status, + Array* device_storage, bool* all_converged, bool* one_changed) { hipLaunchKernelGGL((init_kernel), dim3(1), dim3(1), 0, 0, as_hip_type(device_storage->get_data())); diff --git a/hip/test/base/hip_executor.hip.cpp b/hip/test/base/hip_executor.hip.cpp index c789004ddb8..a26f26687c3 100644 --- a/hip/test/base/hip_executor.hip.cpp +++ b/hip/test/base/hip_executor.hip.cpp @@ -58,7 +58,7 @@ namespace { class ExampleOperation : public gko::Operation { public: - explicit ExampleOperation(int &val) : value(val) {} + explicit ExampleOperation(int& val) : value(val) {} void run(std::shared_ptr) const override { @@ -85,7 +85,7 @@ class ExampleOperation : public gko::Operation { hipGetDevice(&value); } - int &value; + int& value; }; @@ -145,7 +145,7 @@ TEST_F(HipExecutor, MasterKnowsNumberOfDevices) TEST_F(HipExecutor, AllocatesAndFreesMemory) { - int *ptr = nullptr; + int* ptr = nullptr; ASSERT_NO_THROW(ptr = hip->alloc(2)); ASSERT_NO_THROW(hip->free(ptr)); @@ -155,7 +155,7 @@ TEST_F(HipExecutor, AllocatesAndFreesMemory) TEST_F(HipExecutor, FailsWhenOverallocating) { const gko::size_type num_elems = 1ll << 50; // 4PB of integers - int *ptr = nullptr; + int* ptr = nullptr; ASSERT_THROW( { @@ -168,7 +168,7 @@ TEST_F(HipExecutor, FailsWhenOverallocating) } -__global__ void check_data(int *data) +__global__ void check_data(int* data) { if (data[0] != 3 || data[1] != 8) { #if GINKGO_HIP_PLATFORM_HCC @@ -182,7 +182,7 @@ __global__ void check_data(int *data) TEST_F(HipExecutor, CopiesDataToHip) { int orig[] = {3, 8}; - auto *copy = hip->alloc(2); + auto* copy = hip->alloc(2); hip->copy_from(omp.get(), 2, orig, copy); @@ -192,7 +192,7 @@ TEST_F(HipExecutor, CopiesDataToHip) } -__global__ void check_data2(int *data) +__global__ void check_data2(int* data) { if (data[0] != 4 || data[1] != 8) { #if GINKGO_HIP_PLATFORM_HCC @@ -210,7 +210,7 @@ __global__ void check_data2(int *data) TEST_F(HipExecutor, CanAllocateOnUnifiedMemory) { int orig[] = {3, 8}; - auto *copy = hip3->alloc(2); + auto* copy = hip3->alloc(2); hip3->copy_from(omp.get(), 2, orig, copy); @@ -225,7 +225,7 @@ TEST_F(HipExecutor, CanAllocateOnUnifiedMemory) #endif -__global__ void init_data(int *data) +__global__ void init_data(int* data) { data[0] = 3; data[1] = 8; diff --git a/hip/test/base/kernel_launch.hip.cpp b/hip/test/base/kernel_launch.hip.cpp index 261c552056f..55ddb3fd01e 100644 --- a/hip/test/base/kernel_launch.hip.cpp +++ b/hip/test/base/kernel_launch.hip.cpp @@ -98,13 +98,13 @@ class KernelLaunch : public ::testing::Test { // nvcc doesn't like device lambdas declared in complex classes, move it out -void run1d(std::shared_ptr exec, size_type dim, int *data) +void run1d(std::shared_ptr exec, size_type dim, int* data) { gko::kernels::hip::run_kernel( exec, [] GKO_KERNEL(auto i, auto d) { static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); d[i] = i; }, dim, data); @@ -118,14 +118,14 @@ TEST_F(KernelLaunch, Runs1D) } -void run1d(std::shared_ptr exec, gko::Array &data) +void run1d(std::shared_ptr exec, gko::Array& data) { gko::kernels::hip::run_kernel( exec, [] GKO_KERNEL(auto i, auto d, auto d_ptr) { static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); if (d == d_ptr) { d[i] = i; } else { @@ -143,16 +143,16 @@ TEST_F(KernelLaunch, Runs1DArray) } -void run1d(std::shared_ptr exec, gko::matrix::Dense<> *m) +void run1d(std::shared_ptr exec, gko::matrix::Dense<>* m) { gko::kernels::hip::run_kernel( exec, [] GKO_KERNEL(auto i, auto d, auto d2, auto d_ptr) { static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); - static_assert(is_same::value, + static_assert(is_same::value, "type"); bool pointers_correct = d.data == d_ptr && d2.data == d_ptr; bool strides_correct = d.stride == 5 && d2.stride == 5; @@ -167,7 +167,7 @@ void run1d(std::shared_ptr exec, gko::matrix::Dense<> *m) d(i / 4, i % 4) = 0; } }, - 16, m, static_cast *>(m), + 16, m, static_cast*>(m), m->get_const_values()); } @@ -179,14 +179,14 @@ TEST_F(KernelLaunch, Runs1DDense) } -void run2d(std::shared_ptr exec, int *data) +void run2d(std::shared_ptr exec, int* data) { gko::kernels::hip::run_kernel( exec, [] GKO_KERNEL(auto i, auto j, auto d) { static_assert(is_same::value, "index"); static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); d[i + 4 * j] = 4 * i + j; }, dim<2>{4, 4}, data); @@ -200,15 +200,15 @@ TEST_F(KernelLaunch, Runs2D) } -void run2d(std::shared_ptr exec, gko::Array &data) +void run2d(std::shared_ptr exec, gko::Array& data) { gko::kernels::hip::run_kernel( exec, [] GKO_KERNEL(auto i, auto j, auto d, auto d_ptr) { static_assert(is_same::value, "index"); static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); if (d == d_ptr) { d[i + 4 * j] = 4 * i + j; } else { @@ -226,23 +226,23 @@ TEST_F(KernelLaunch, Runs2DArray) } -void run2d(std::shared_ptr exec, gko::matrix::Dense<> *m1, - gko::matrix::Dense<> *m2, gko::matrix::Dense<> *m3) +void run2d(std::shared_ptr exec, gko::matrix::Dense<>* m1, + gko::matrix::Dense<>* m2, gko::matrix::Dense<>* m3) { gko::kernels::hip::run_kernel_solver( exec, [] GKO_KERNEL(auto i, auto j, auto d, auto d2, auto d_ptr, auto d3, auto d4, auto d2_ptr, auto d3_ptr) { static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); - static_assert(is_same::value, + static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); bool pointers_correct = d.data == d_ptr && d2.data == d_ptr && d3.data == d2_ptr && d4 == d3_ptr; bool strides_correct = @@ -261,7 +261,7 @@ void run2d(std::shared_ptr exec, gko::matrix::Dense<> *m1, } }, dim<2>{4, 4}, m2->get_stride(), m1, - static_cast *>(m1), m1->get_const_values(), + static_cast*>(m1), m1->get_const_values(), gko::kernels::hip::default_stride(m2), gko::kernels::hip::row_vector(m3), m2->get_values(), m3->get_values()); } diff --git a/hip/test/base/math.hip.cpp b/hip/test/base/math.hip.cpp index 95d626225de..72ca63d8b00 100644 --- a/hip/test/base/math.hip.cpp +++ b/hip/test/base/math.hip.cpp @@ -101,7 +101,7 @@ __device__ bool test_complex_is_finite_function(FuncType isfin) template -__global__ void test_real_is_finite(bool *result) +__global__ void test_real_is_finite(bool* result) { *result = kernel::test_real_is_finite_function( [](T val) { return gko::is_finite(val); }); @@ -109,7 +109,7 @@ __global__ void test_real_is_finite(bool *result) template -__global__ void test_complex_is_finite(bool *result) +__global__ void test_complex_is_finite(bool* result) { *result = kernel::test_complex_is_finite_function( [](ComplexType val) { return gko::is_finite(val); }); diff --git a/hip/test/components/cooperative_groups_kernels.hip.cpp b/hip/test/components/cooperative_groups_kernels.hip.cpp index ec1ce79e080..1b6a40257a1 100644 --- a/hip/test/components/cooperative_groups_kernels.hip.cpp +++ b/hip/test/components/cooperative_groups_kernels.hip.cpp @@ -105,7 +105,7 @@ class CooperativeGroups : public ::testing::Test { constexpr static int subwarp_size = config::warp_size / 4; -__device__ void test_assert(bool *success, bool partial) +__device__ void test_assert(bool* success, bool partial) { if (!partial) { *success = false; @@ -113,7 +113,7 @@ __device__ void test_assert(bool *success, bool partial) } -__global__ void cg_shuffle(bool *s) +__global__ void cg_shuffle(bool* s) { auto group = group::tiled_partition(group::this_thread_block()); @@ -127,7 +127,7 @@ __global__ void cg_shuffle(bool *s) TEST_F(CooperativeGroups, Shuffle) { test(cg_shuffle); } -__global__ void cg_all(bool *s) +__global__ void cg_all(bool* s) { auto group = group::tiled_partition(group::this_thread_block()); @@ -140,7 +140,7 @@ __global__ void cg_all(bool *s) TEST_F(CooperativeGroups, All) { test(cg_all); } -__global__ void cg_any(bool *s) +__global__ void cg_any(bool* s) { auto group = group::tiled_partition(group::this_thread_block()); @@ -153,7 +153,7 @@ __global__ void cg_any(bool *s) TEST_F(CooperativeGroups, Any) { test(cg_any); } -__global__ void cg_ballot(bool *s) +__global__ void cg_ballot(bool* s) { auto group = group::tiled_partition(group::this_thread_block()); @@ -166,7 +166,7 @@ __global__ void cg_ballot(bool *s) TEST_F(CooperativeGroups, Ballot) { test(cg_ballot); } -__global__ void cg_subwarp_shuffle(bool *s) +__global__ void cg_subwarp_shuffle(bool* s) { auto group = group::tiled_partition(group::this_thread_block()); @@ -193,7 +193,7 @@ TEST_F(CooperativeGroups, SubwarpShuffle) { test(cg_subwarp_shuffle); } TEST_F(CooperativeGroups, SubwarpShuffle2) { test_subwarp(cg_subwarp_shuffle); } -__global__ void cg_subwarp_all(bool *s) +__global__ void cg_subwarp_all(bool* s) { auto grp = threadIdx.x / subwarp_size; bool test_grp = grp == 1; @@ -222,7 +222,7 @@ TEST_F(CooperativeGroups, SubwarpAll) { test(cg_subwarp_all); } TEST_F(CooperativeGroups, SubwarpAll2) { test_subwarp(cg_subwarp_all); } -__global__ void cg_subwarp_any(bool *s) +__global__ void cg_subwarp_any(bool* s) { auto grp = threadIdx.x / subwarp_size; bool test_grp = grp == 1; @@ -251,7 +251,7 @@ TEST_F(CooperativeGroups, SubwarpAny) { test(cg_subwarp_any); } TEST_F(CooperativeGroups, SubwarpAny2) { test_subwarp(cg_subwarp_any); } -__global__ void cg_subwarp_ballot(bool *s) +__global__ void cg_subwarp_ballot(bool* s) { auto grp = threadIdx.x / subwarp_size; bool test_grp = grp == 1; @@ -282,7 +282,7 @@ TEST_F(CooperativeGroups, SubwarpBallot2) { test_subwarp(cg_subwarp_ballot); } template -__global__ void cg_shuffle_sum(const int num, ValueType *__restrict__ value) +__global__ void cg_shuffle_sum(const int num, ValueType* __restrict__ value) { auto group = group::tiled_partition(group::this_thread_block()); diff --git a/hip/test/components/merging_kernels.hip.cpp b/hip/test/components/merging_kernels.hip.cpp index d5acbf2b8c4..a02de0f6602 100644 --- a/hip/test/components/merging_kernels.hip.cpp +++ b/hip/test/components/merging_kernels.hip.cpp @@ -155,8 +155,8 @@ class Merging : public ::testing::Test { }; -__global__ void test_merge_step(const gko::int32 *a, const gko::int32 *b, - gko::int32 *c) +__global__ void test_merge_step(const gko::int32* a, const gko::int32* b, + gko::int32* c) { auto warp = tiled_partition(this_thread_block()); auto i = warp.thread_rank(); @@ -178,8 +178,8 @@ TEST_F(Merging, MergeStep) } -__global__ void test_merge(const gko::int32 *a, const gko::int32 *b, int size, - gko::int32 *c) +__global__ void test_merge(const gko::int32* a, const gko::int32* b, int size, + gko::int32* c) { auto warp = tiled_partition(this_thread_block()); group_merge(a, size, b, size, warp, @@ -208,8 +208,8 @@ TEST_F(Merging, FullMerge) } -__global__ void test_sequential_merge(const gko::int32 *a, const gko::int32 *b, - int size, gko::int32 *c) +__global__ void test_sequential_merge(const gko::int32* a, const gko::int32* b, + int size, gko::int32* c) { sequential_merge( a, size, b, size, @@ -235,11 +235,11 @@ TEST_F(Merging, SequentialFullMerge) } -__global__ void test_merge_idxs(const gko::int32 *a, const gko::int32 *b, - int size, gko::int32 *c, gko::int32 *aidxs, - gko::int32 *bidxs, gko::int32 *cidxs, - gko::int32 *refaidxs, gko::int32 *refbidxs, - gko::int32 *refcidxs) +__global__ void test_merge_idxs(const gko::int32* a, const gko::int32* b, + int size, gko::int32* c, gko::int32* aidxs, + gko::int32* bidxs, gko::int32* cidxs, + gko::int32* refaidxs, gko::int32* refbidxs, + gko::int32* refcidxs) { if (threadIdx.x == 0) { sequential_merge(a, size, b, size, diff --git a/hip/test/components/searching_kernels.hip.cpp b/hip/test/components/searching_kernels.hip.cpp index d22c4a125e0..cacb480abba 100644 --- a/hip/test/components/searching_kernels.hip.cpp +++ b/hip/test/components/searching_kernels.hip.cpp @@ -96,7 +96,7 @@ class Searching : public ::testing::Test { }; -__device__ void test_assert(bool *success, bool predicate) +__device__ void test_assert(bool* success, bool predicate) { if (!predicate) { *success = false; @@ -104,7 +104,7 @@ __device__ void test_assert(bool *success, bool predicate) } -__global__ void test_binary_search(bool *success, int offset, int size) +__global__ void test_binary_search(bool* success, int offset, int size) { // test binary search on [offset, offset + size) // for all possible partition points @@ -133,7 +133,7 @@ TEST_F(Searching, BinaryOffset) } -__global__ void test_empty_binary_search(bool *success, int offset, int) +__global__ void test_empty_binary_search(bool* success, int offset, int) { auto result = binary_search(offset, 0, [&](int i) { // don't access out-of-bounds! @@ -154,7 +154,7 @@ TEST_F(Searching, BinaryEmptyOffset) } -__global__ void test_sync_binary_search(bool *success, int, int size) +__global__ void test_sync_binary_search(bool* success, int, int size) { // test binary search on [0, size) // for all possible partition points @@ -178,7 +178,7 @@ TEST_F(Searching, SyncBinary) } -__global__ void test_empty_sync_binary_search(bool *success, int, int) +__global__ void test_empty_sync_binary_search(bool* success, int, int) { auto result = synchronous_binary_search(0, [&](int i) { // don't access out-of-bounds! @@ -194,7 +194,7 @@ TEST_F(Searching, EmptySyncBinary) } -__global__ void test_warp_ary_search(bool *success, int offset, int size) +__global__ void test_warp_ary_search(bool* success, int offset, int size) { // test binary search on [offset, offset + size) // for all possible partition points @@ -222,7 +222,7 @@ TEST_F(Searching, WarpAryOffset) } -__global__ void test_warp_wide_search(bool *success, int offset, int size) +__global__ void test_warp_wide_search(bool* success, int offset, int size) { // test binary search on [offset, offset + size) // for all possible partition points diff --git a/hip/test/components/sorting_kernels.hip.cpp b/hip/test/components/sorting_kernels.hip.cpp index ed65b1ea798..8d8b7f0d478 100644 --- a/hip/test/components/sorting_kernels.hip.cpp +++ b/hip/test/components/sorting_kernels.hip.cpp @@ -59,7 +59,7 @@ constexpr int num_local = 4; constexpr auto num_threads = num_elements / num_local; -__global__ void test_sort_shared(gko::int32 *data) +__global__ void test_sort_shared(gko::int32* data) { gko::int32 local[num_local]; __shared__ gko::int32 sh_local[num_elements]; @@ -73,14 +73,14 @@ __global__ void test_sort_shared(gko::int32 *data) } -__global__ void test_sort_warp(gko::int32 *data) +__global__ void test_sort_warp(gko::int32* data) { gko::int32 local[num_local]; for (int i = 0; i < num_local; ++i) { local[i] = data[threadIdx.x * num_local + i]; } bitonic_sort( - local, static_cast(nullptr)); + local, static_cast(nullptr)); for (int i = 0; i < num_local; ++i) { data[threadIdx.x * num_local + i] = local[i]; } diff --git a/hip/test/factorization/par_ilu_kernels.hip.cpp b/hip/test/factorization/par_ilu_kernels.hip.cpp index 552624012e7..31441f01707 100644 --- a/hip/test/factorization/par_ilu_kernels.hip.cpp +++ b/hip/test/factorization/par_ilu_kernels.hip.cpp @@ -134,10 +134,10 @@ class ParIlu : public ::testing::Test { return mtx; } - void initialize_row_ptrs(index_type *l_row_ptrs_ref, - index_type *u_row_ptrs_ref, - index_type *l_row_ptrs_hip, - index_type *u_row_ptrs_hip) + void initialize_row_ptrs(index_type* l_row_ptrs_ref, + index_type* u_row_ptrs_ref, + index_type* l_row_ptrs_hip, + index_type* u_row_ptrs_hip) { gko::kernels::reference::factorization::initialize_row_ptrs_l_u( ref, gko::lend(csr_ref), l_row_ptrs_ref, u_row_ptrs_ref); @@ -145,8 +145,8 @@ class ParIlu : public ::testing::Test { hip, gko::lend(csr_hip), l_row_ptrs_hip, u_row_ptrs_hip); } - void initialize_lu(std::unique_ptr *l_ref, std::unique_ptr *u_ref, - std::unique_ptr *l_hip, std::unique_ptr *u_hip) + void initialize_lu(std::unique_ptr* l_ref, std::unique_ptr* u_ref, + std::unique_ptr* l_hip, std::unique_ptr* u_hip) { auto num_row_ptrs = csr_ref->get_size()[0] + 1; gko::Array l_row_ptrs_ref{ref, num_row_ptrs}; @@ -184,13 +184,13 @@ class ParIlu : public ::testing::Test { template static std::unique_ptr static_unique_ptr_cast( - std::unique_ptr &&from) + std::unique_ptr&& from) { - return std::unique_ptr{static_cast(from.release())}; + return std::unique_ptr{static_cast(from.release())}; } - void compute_lu(std::unique_ptr *l_ref, std::unique_ptr *u_ref, - std::unique_ptr *l_hip, std::unique_ptr *u_hip, + void compute_lu(std::unique_ptr* l_ref, std::unique_ptr* u_ref, + std::unique_ptr* l_hip, std::unique_ptr* u_hip, gko::size_type iterations = 0) { auto coo_ref = Coo::create(ref); diff --git a/hip/test/factorization/par_ilut_kernels.hip.cpp b/hip/test/factorization/par_ilut_kernels.hip.cpp index 7b68b024a76..8782654d754 100644 --- a/hip/test/factorization/par_ilut_kernels.hip.cpp +++ b/hip/test/factorization/par_ilut_kernels.hip.cpp @@ -174,8 +174,8 @@ class ParIlut : public ::testing::Test { } template - void test_select(const std::unique_ptr &mtx, - const std::unique_ptr &dmtx, index_type rank, + void test_select(const std::unique_ptr& mtx, + const std::unique_ptr& dmtx, index_type rank, value_type tolerance = 0.0) { auto size = index_type(mtx->get_num_stored_elements()); @@ -199,8 +199,8 @@ class ParIlut : public ::testing::Test { template > - void test_filter(const std::unique_ptr &mtx, - const std::unique_ptr &dmtx, value_type threshold, + void test_filter(const std::unique_ptr& mtx, + const std::unique_ptr& dmtx, value_type threshold, bool lower) { auto res = Mtx::create(ref, mtx_size); @@ -228,8 +228,8 @@ class ParIlut : public ::testing::Test { template > - void test_filter_approx(const std::unique_ptr &mtx, - const std::unique_ptr &dmtx, index_type rank, + void test_filter_approx(const std::unique_ptr& mtx, + const std::unique_ptr& dmtx, index_type rank, value_type tolerance = 0.0) { auto res = Mtx::create(ref, mtx_size); @@ -335,7 +335,7 @@ TEST_F(ParIlut, KernelThresholdFilterNullptrCooIsEquivalentToRef) { auto res = Csr::create(ref, mtx_size); auto dres = Csr::create(hip, mtx_size); - Coo *null_coo = nullptr; + Coo* null_coo = nullptr; gko::kernels::reference::par_ilut_factorization::threshold_filter( ref, mtx_l.get(), 0.5, res.get(), null_coo, true); @@ -428,7 +428,7 @@ TEST_F(ParIlut, KernelThresholdFilterApproxNullptrCooIsEquivalentToRef) test_filter(mtx_l, dmtx_l, 0.5, true); auto res = Csr::create(ref, mtx_size); auto dres = Csr::create(hip, mtx_size); - Coo *null_coo = nullptr; + Coo* null_coo = nullptr; gko::Array tmp(ref); gko::Array dtmp(hip); gko::remove_complex threshold{}; diff --git a/hip/test/matrix/dense_kernels.hip.cpp b/hip/test/matrix/dense_kernels.hip.cpp index cb6e84ac5e5..0739490c542 100644 --- a/hip/test/matrix/dense_kernels.hip.cpp +++ b/hip/test/matrix/dense_kernels.hip.cpp @@ -135,7 +135,7 @@ class Dense : public ::testing::Test { std::shuffle(tmp2.begin(), tmp2.end(), rng); std::vector tmp3(x->get_size()[0] / 10); std::uniform_int_distribution row_dist(0, x->get_size()[0] - 1); - for (auto &i : tmp3) { + for (auto& i : tmp3) { i = row_dist(rng); } rpermute_idxs = @@ -147,7 +147,7 @@ class Dense : public ::testing::Test { } template - std::unique_ptr convert(InputType &&input) + std::unique_ptr convert(InputType&& input) { auto result = ConvertedType::create(input->get_executor()); input->convert_to(result.get()); @@ -567,8 +567,8 @@ TEST_F(Dense, IsTransposable) auto trans = x->transpose(); auto dtrans = dx->transpose(); - GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), - static_cast(trans.get()), 0); + GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), + static_cast(trans.get()), 0); } diff --git a/hip/test/matrix/diagonal_kernels.hip.cpp b/hip/test/matrix/diagonal_kernels.hip.cpp index c3a0eec00bc..a3e3dac1bd3 100644 --- a/hip/test/matrix/diagonal_kernels.hip.cpp +++ b/hip/test/matrix/diagonal_kernels.hip.cpp @@ -238,9 +238,9 @@ TEST_F(Diagonal, ConjTransposeIsEquivalentToRef) set_up_complex_data(); auto trans = cdiag->conj_transpose(); - auto trans_diag = static_cast(trans.get()); + auto trans_diag = static_cast(trans.get()); auto dtrans = dcdiag->conj_transpose(); - auto dtrans_diag = static_cast(dtrans.get()); + auto dtrans_diag = static_cast(dtrans.get()); GKO_ASSERT_MTX_NEAR(trans_diag, dtrans_diag, 0); } diff --git a/hip/test/multigrid/amgx_pgm_kernels.cpp b/hip/test/multigrid/amgx_pgm_kernels.cpp index 55c8cada515..e6f16724d2f 100644 --- a/hip/test/multigrid/amgx_pgm_kernels.cpp +++ b/hip/test/multigrid/amgx_pgm_kernels.cpp @@ -153,7 +153,7 @@ class AmgxPgm : public ::testing::Test { d_system_mtx = gko::clone(hip, system_mtx); } - void make_weight(Mtx *mtx) + void make_weight(Mtx* mtx) { gko::test::make_symmetric(mtx); // only works for real value cases diff --git a/hip/test/preconditioner/isai_kernels.hip.cpp b/hip/test/preconditioner/isai_kernels.hip.cpp index bf050fb73ff..6ad722abc4e 100644 --- a/hip/test/preconditioner/isai_kernels.hip.cpp +++ b/hip/test/preconditioner/isai_kernels.hip.cpp @@ -71,7 +71,7 @@ class Isai : public ::testing::Test { hip = gko::HipExecutor::create(0, ref); } - std::unique_ptr clone_allocations(const Csr *csr_mtx) + std::unique_ptr clone_allocations(const Csr* csr_mtx) { if (csr_mtx->get_executor() != ref) { return {nullptr}; @@ -117,7 +117,7 @@ class Isai : public ::testing::Test { d_inverse = gko::clone(hip, inverse); } - void ensure_diagonal(Dense *mtx) + void ensure_diagonal(Dense* mtx) { for (int i = 0; i < mtx->get_size()[0]; ++i) { mtx->at(i, i) = gko::one(); diff --git a/hip/test/solver/cb_gmres_kernels.cpp b/hip/test/solver/cb_gmres_kernels.cpp index 3d3d6b2c18d..91db227e3ad 100644 --- a/hip/test/solver/cb_gmres_kernels.cpp +++ b/hip/test/solver/cb_gmres_kernels.cpp @@ -95,7 +95,7 @@ class CbGmres : public ::testing::Test { Range3dHelper generate_krylov_helper(gko::dim<3> size) { auto helper = Range3dHelper{ref, size}; - auto &bases = helper.get_bases(); + auto& bases = helper.get_bases(); const auto num_rows = size[0] * size[1]; const auto num_cols = size[2]; auto temp_krylov_bases = gko::test::generate_random_matrix( @@ -191,7 +191,7 @@ class CbGmres : public ::testing::Test { void assert_krylov_bases_near() { gko::Array d_to_host{ref}; - auto &krylov_bases = range_helper.get_bases(); + auto& krylov_bases = range_helper.get_bases(); d_to_host = d_range_helper.get_bases(); const auto tolerance = r::value; using std::abs; diff --git a/include/ginkgo/core/base/abstract_factory.hpp b/include/ginkgo/core/base/abstract_factory.hpp index 3a6535884ce..71aeb7ec4e3 100644 --- a/include/ginkgo/core/base/abstract_factory.hpp +++ b/include/ginkgo/core/base/abstract_factory.hpp @@ -89,7 +89,7 @@ class AbstractFactory * @return an instance of AbstractProductType */ template - std::unique_ptr generate(Args &&... args) const + std::unique_ptr generate(Args&&... args) const { auto product = this->generate_impl({std::forward(args)...}); for (auto logger : this->loggers_) { @@ -159,9 +159,9 @@ class EnableDefaultFactory using components_type = typename PolymorphicBase::components_type; template - std::unique_ptr generate(Args &&... args) const + std::unique_ptr generate(Args&&... args) const { - auto product = std::unique_ptr(static_cast( + auto product = std::unique_ptr(static_cast( this->generate_impl({std::forward(args)...}).release())); propagate_loggers(product.get()); return product; @@ -172,7 +172,7 @@ class EnableDefaultFactory * * @return the parameters of the factory */ - const parameters_type &get_parameters() const noexcept + const parameters_type& get_parameters() const noexcept { return parameters_; }; @@ -204,7 +204,7 @@ class EnableDefaultFactory std::is_base_of::value && std::is_base_of::value, void>::type - propagate_loggers(TheType *product) const + propagate_loggers(TheType* product) const { for (auto logger : this->loggers_) { product->add_logger(logger); @@ -223,7 +223,7 @@ class EnableDefaultFactory !std::is_base_of::value || !std::is_base_of::value, void>::type - propagate_loggers(TheType *product) const + propagate_loggers(TheType* product) const {} /** @@ -233,7 +233,7 @@ class EnableDefaultFactory * @param parameters the parameters structure for the factory */ explicit EnableDefaultFactory(std::shared_ptr exec, - const parameters_type ¶meters = {}) + const parameters_type& parameters = {}) : EnablePolymorphicObject( std::move(exec)), parameters_{parameters} diff --git a/include/ginkgo/core/base/array.hpp b/include/ginkgo/core/base/array.hpp index c4aa2323417..794f9589e29 100644 --- a/include/ginkgo/core/base/array.hpp +++ b/include/ginkgo/core/base/array.hpp @@ -62,7 +62,7 @@ namespace detail { */ template void convert_data(std::shared_ptr exec, size_type size, - const SourceType *src, TargetType *dst); + const SourceType* src, TargetType* dst); } // namespace detail @@ -165,7 +165,7 @@ class Array { */ template Array(std::shared_ptr exec, size_type num_elems, - value_type *data, DeleterType deleter) + value_type* data, DeleterType deleter) : num_elems_{num_elems}, data_(data, deleter), exec_{exec} {} @@ -180,7 +180,7 @@ class Array { * @param data chunk of memory used to create the array */ Array(std::shared_ptr exec, size_type num_elems, - value_type *data) + value_type* data) : Array(exec, num_elems, data, default_deleter{exec}) {} @@ -229,7 +229,7 @@ class Array { * @param exec the executor where the new array will be created * @param other the Array to copy from */ - Array(std::shared_ptr exec, const Array &other) + Array(std::shared_ptr exec, const Array& other) : Array(exec) { *this = other; @@ -243,7 +243,7 @@ class Array { * * @param other the Array to copy from */ - Array(const Array &other) : Array(other.get_executor(), other) {} + Array(const Array& other) : Array(other.get_executor(), other) {} /** * Moves another array to a different executor. @@ -254,7 +254,7 @@ class Array { * @param exec the executor where the new array will be moved * @param other the Array to move */ - Array(std::shared_ptr exec, Array &&other) : Array(exec) + Array(std::shared_ptr exec, Array&& other) : Array(exec) { *this = std::move(other); } @@ -267,7 +267,7 @@ class Array { * * @param other the Array to move */ - Array(Array &&other) : Array(other.get_executor(), std::move(other)) {} + Array(Array&& other) : Array(other.get_executor(), std::move(other)) {} /** * Creates an Array from existing memory. @@ -283,7 +283,7 @@ class Array { * @return an Array constructed from `data` */ static Array view(std::shared_ptr exec, size_type num_elems, - value_type *data) + value_type* data) { return Array{exec, num_elems, data, view_deleter{}}; } @@ -304,7 +304,7 @@ class Array { * * @return this */ - Array &operator=(const Array &other) + Array& operator=(const Array& other) { if (&other == this) { return *this; @@ -358,7 +358,7 @@ class Array { * * @return this */ - Array &operator=(Array &&other) + Array& operator=(Array&& other) { if (&other == this) { return *this; @@ -402,8 +402,8 @@ class Array { * @return this */ template - std::enable_if_t::value, Array> - &operator=(const Array &other) + std::enable_if_t::value, Array>& + operator=(const Array& other) { if (this->exec_ == nullptr) { this->exec_ = other.get_executor(); @@ -421,7 +421,7 @@ class Array { this->num_elems_); } Array tmp{this->exec_}; - const OtherValueType *source = other.get_const_data(); + const OtherValueType* source = other.get_const_data(); // if we are on different executors: copy, then convert if (this->exec_ != other.get_executor()) { tmp = other; @@ -500,7 +500,7 @@ class Array { * @return a pointer to the block of memory used to store the elements of * the Array */ - value_type *get_data() noexcept { return data_.get(); } + value_type* get_data() noexcept { return data_.get(); } /** * Returns a constant pointer to the block of memory used to store the @@ -509,7 +509,7 @@ class Array { * @return a constant pointer to the block of memory used to store the * elements of the Array */ - const value_type *get_const_data() const noexcept { return data_.get(); } + const value_type* get_const_data() const noexcept { return data_.get(); } /** * Returns the Executor associated with the array. @@ -576,7 +576,7 @@ namespace detail { template struct temporary_clone_helper> { static std::unique_ptr> create( - std::shared_ptr exec, Array *ptr, bool copy_data) + std::shared_ptr exec, Array* ptr, bool copy_data) { if (copy_data) { return std::make_unique>(std::move(exec), *ptr); @@ -590,7 +590,7 @@ struct temporary_clone_helper> { template struct temporary_clone_helper> { static std::unique_ptr> create( - std::shared_ptr exec, const Array *ptr, bool) + std::shared_ptr exec, const Array* ptr, bool) { return std::make_unique>(std::move(exec), *ptr); } @@ -601,7 +601,7 @@ struct temporary_clone_helper> { template class copy_back_deleter> { public: - using pointer = Array *; + using pointer = Array*; /** * Creates a new deleter object. diff --git a/include/ginkgo/core/base/combination.hpp b/include/ginkgo/core/base/combination.hpp index d7cc9f71cde..0b3155106f5 100644 --- a/include/ginkgo/core/base/combination.hpp +++ b/include/ginkgo/core/base/combination.hpp @@ -67,7 +67,7 @@ class Combination : public EnableLinOp>, * * @return a list of coefficients */ - const std::vector> &get_coefficients() const + const std::vector>& get_coefficients() const noexcept { return coefficients_; @@ -78,7 +78,7 @@ class Combination : public EnableLinOp>, * * @return a list of operators */ - const std::vector> &get_operators() const + const std::vector>& get_operators() const noexcept { return operators_; @@ -131,11 +131,11 @@ class Combination : public EnableLinOp>, coefficients_(coefficient_begin, coefficient_end), operators_(operator_begin, operator_end) { - for (const auto &c : coefficients_) { + for (const auto& c : coefficients_) { GKO_ASSERT_EQUAL_DIMENSIONS(c, dim<2>(1, 1)); } this->set_size(operators_[0]->get_size()); - for (const auto &o : operators_) { + for (const auto& o : operators_) { GKO_ASSERT_EQUAL_DIMENSIONS(o, this->get_size()); } } @@ -152,7 +152,7 @@ class Combination : public EnableLinOp>, */ template explicit Combination(std::shared_ptr coef, - std::shared_ptr oper, Rest &&... rest) + std::shared_ptr oper, Rest&&... rest) : Combination(std::forward(rest)...) { GKO_ASSERT_EQUAL_DIMENSIONS(coef, dim<2>(1, 1)); @@ -181,10 +181,10 @@ class Combination : public EnableLinOp>, operators_{oper} {} - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; private: std::vector> coefficients_; @@ -194,8 +194,8 @@ class Combination : public EnableLinOp>, mutable struct cache_struct { cache_struct() = default; ~cache_struct() = default; - cache_struct(const cache_struct &other) {} - cache_struct &operator=(const cache_struct &other) { return *this; } + cache_struct(const cache_struct& other) {} + cache_struct& operator=(const cache_struct& other) { return *this; } std::unique_ptr zero; std::unique_ptr one; diff --git a/include/ginkgo/core/base/composition.hpp b/include/ginkgo/core/base/composition.hpp index 3be5ca30db3..4d5302b3bed 100644 --- a/include/ginkgo/core/base/composition.hpp +++ b/include/ginkgo/core/base/composition.hpp @@ -75,7 +75,7 @@ class Composition : public EnableLinOp>, * * @return a list of operators */ - const std::vector> &get_operators() const + const std::vector>& get_operators() const noexcept { return operators_; @@ -133,7 +133,7 @@ class Composition : public EnableLinOp>, * @param rest remainging operators */ template - explicit Composition(std::shared_ptr oper, Rest &&... rest) + explicit Composition(std::shared_ptr oper, Rest&&... rest) : Composition(std::forward(rest)...) { GKO_ASSERT_CONFORMANT(oper, operators_[0]); @@ -156,10 +156,10 @@ class Composition : public EnableLinOp>, storage_{oper->get_executor()} {} - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; private: std::vector> operators_; @@ -213,7 +213,7 @@ class UseComposition { * Sets the composition with a list of operators */ template - void set_composition(LinOp &&... linop) + void set_composition(LinOp&&... linop) { composition_ = Composition::create(std::forward(linop)...); diff --git a/include/ginkgo/core/base/device.hpp b/include/ginkgo/core/base/device.hpp index 5eaf892789d..9eda0b29488 100644 --- a/include/ginkgo/core/base/device.hpp +++ b/include/ginkgo/core/base/device.hpp @@ -67,7 +67,7 @@ class nvidia_device { * * @return recursive_mutex reference */ - static std::mutex &get_mutex(int i); + static std::mutex& get_mutex(int i); /** * get_num_execs gets the static num_execs reference at i. @@ -76,7 +76,7 @@ class nvidia_device { * * @return int reference */ - static int &get_num_execs(int i); + static int& get_num_execs(int i); static constexpr int max_devices = 64; }; @@ -97,7 +97,7 @@ class amd_device { * * @return recursive_mutex reference */ - static std::mutex &get_mutex(int i); + static std::mutex& get_mutex(int i); /** * get_num_execs gets the static num_execs reference at i. @@ -106,7 +106,7 @@ class amd_device { * * @return int reference */ - static int &get_num_execs(int i); + static int& get_num_execs(int i); static constexpr int max_devices = 64; }; diff --git a/include/ginkgo/core/base/dim.hpp b/include/ginkgo/core/base/dim.hpp index 726b899ba60..5d241c482d7 100644 --- a/include/ginkgo/core/base/dim.hpp +++ b/include/ginkgo/core/base/dim.hpp @@ -63,7 +63,7 @@ struct dim { * * @param size the size of each dimension */ - constexpr GKO_ATTRIBUTES dim(const dimension_type &size = dimension_type{}) + constexpr GKO_ATTRIBUTES dim(const dimension_type& size = dimension_type{}) : first_{size}, rest_{size} {} @@ -81,8 +81,8 @@ struct dim { * @param rest other dimensions */ template - constexpr GKO_ATTRIBUTES dim(const dimension_type &first, - const Rest &... rest) + constexpr GKO_ATTRIBUTES dim(const dimension_type& first, + const Rest&... rest) : first_{first}, rest_{static_cast(rest)...} {} @@ -97,8 +97,8 @@ struct dim { * * @return the `dimension`-th dimension */ - constexpr GKO_ATTRIBUTES const dimension_type &operator[]( - const size_type &dimension) const noexcept + constexpr GKO_ATTRIBUTES const dimension_type& operator[]( + const size_type& dimension) const noexcept { return GKO_ASSERT(dimension < dimensionality), dimension == 0 ? first_ : rest_[dimension - 1]; @@ -107,8 +107,8 @@ struct dim { /** * @copydoc operator[]() const */ - GKO_ATTRIBUTES dimension_type &operator[]( - const size_type &dimension) noexcept + GKO_ATTRIBUTES dimension_type& operator[]( + const size_type& dimension) noexcept { return GKO_ASSERT(dimension < dimensionality), dimension == 0 ? first_ : rest_[dimension - 1]; @@ -138,7 +138,7 @@ struct dim { * * @return true if and only if all dimensions of both objects are equal. */ - friend constexpr GKO_ATTRIBUTES bool operator==(const dim &x, const dim &y) + friend constexpr GKO_ATTRIBUTES bool operator==(const dim& x, const dim& y) { return x.first_ == y.first_ && x.rest_ == y.rest_; } @@ -151,7 +151,7 @@ struct dim { * * @return a dim object representing the size of the tensor product `x * y` */ - friend constexpr GKO_ATTRIBUTES dim operator*(const dim &x, const dim &y) + friend constexpr GKO_ATTRIBUTES dim operator*(const dim& x, const dim& y) { return dim(x.first_ * y.first_, x.rest_ * y.rest_); } @@ -164,7 +164,7 @@ struct dim { * * @return a stream object appended with the dim output */ - friend std::ostream &operator<<(std::ostream &os, const dim &x) + friend std::ostream& operator<<(std::ostream& os, const dim& x) { os << "("; x.print_to(os); @@ -173,7 +173,7 @@ struct dim { } private: - void inline print_to(std::ostream &os) const + void inline print_to(std::ostream& os) const { os << first_ << ", "; rest_.print_to(os); @@ -198,17 +198,17 @@ struct dim<1u, DimensionType> { using dimension_type = DimensionType; - constexpr GKO_ATTRIBUTES dim(const dimension_type &size = dimension_type{}) + constexpr GKO_ATTRIBUTES dim(const dimension_type& size = dimension_type{}) : first_{size} {} - constexpr GKO_ATTRIBUTES const dimension_type &operator[]( - const size_type &dimension) const noexcept + constexpr GKO_ATTRIBUTES const dimension_type& operator[]( + const size_type& dimension) const noexcept { return GKO_ASSERT(dimension == 0), first_; } - GKO_ATTRIBUTES dimension_type &operator[](const size_type &dimension) + GKO_ATTRIBUTES dimension_type& operator[](const size_type& dimension) { return GKO_ASSERT(dimension == 0), first_; } @@ -218,17 +218,17 @@ struct dim<1u, DimensionType> { return static_cast(first_); } - friend constexpr GKO_ATTRIBUTES bool operator==(const dim &x, const dim &y) + friend constexpr GKO_ATTRIBUTES bool operator==(const dim& x, const dim& y) { return x.first_ == y.first_; } - friend constexpr GKO_ATTRIBUTES dim operator*(const dim &x, const dim &y) + friend constexpr GKO_ATTRIBUTES dim operator*(const dim& x, const dim& y) { return dim(x.first_ * y.first_); } - friend std::ostream &operator<<(std::ostream &os, const dim &x) + friend std::ostream& operator<<(std::ostream& os, const dim& x) { os << "("; x.print_to(os); @@ -237,7 +237,7 @@ struct dim<1u, DimensionType> { } private: - void inline print_to(std::ostream &os) const { os << first_; } + void inline print_to(std::ostream& os) const { os << first_; } dimension_type first_; }; @@ -256,8 +256,8 @@ struct dim<1u, DimensionType> { */ template constexpr GKO_ATTRIBUTES GKO_INLINE bool operator!=( - const dim &x, - const dim &y) + const dim& x, + const dim& y) { return !(x == y); } @@ -274,7 +274,7 @@ constexpr GKO_ATTRIBUTES GKO_INLINE bool operator!=( */ template constexpr GKO_ATTRIBUTES GKO_INLINE dim<2, DimensionType> transpose( - const dim<2, DimensionType> &dimensions) noexcept + const dim<2, DimensionType>& dimensions) noexcept { return {dimensions[1], dimensions[0]}; } diff --git a/include/ginkgo/core/base/exception.hpp b/include/ginkgo/core/base/exception.hpp index 7556602f13b..6a0a874f0d2 100644 --- a/include/ginkgo/core/base/exception.hpp +++ b/include/ginkgo/core/base/exception.hpp @@ -92,7 +92,7 @@ class Error : public std::exception { * @param line The source code line number where the error occurred * @param what The error message */ - Error(const std::string &file, int line, const std::string &what) + Error(const std::string& file, int line, const std::string& what) : what_(file + ":" + std::to_string(line) + ": " + what) {} @@ -100,7 +100,7 @@ class Error : public std::exception { * Returns a human-readable string with a more detailed description of the * error. */ - virtual const char *what() const noexcept override { return what_.c_str(); } + virtual const char* what() const noexcept override { return what_.c_str(); } private: const std::string what_; @@ -120,7 +120,7 @@ class NotImplemented : public Error { * @param line The source code line number where the error occurred * @param func The name of the not-yet implemented function */ - NotImplemented(const std::string &file, int line, const std::string &func) + NotImplemented(const std::string& file, int line, const std::string& func) : Error(file, line, func + " is not implemented") {} }; @@ -140,8 +140,8 @@ class NotCompiled : public Error { * @param func The name of the function that has not been compiled * @param module The name of the module which contains the function */ - NotCompiled(const std::string &file, int line, const std::string &func, - const std::string &module) + NotCompiled(const std::string& file, int line, const std::string& func, + const std::string& module) : Error(file, line, "feature " + func + " is part of the " + module + " module, which is not compiled on this system") @@ -164,8 +164,8 @@ class NotSupported : public Error { * @param obj_type The object type on which the requested operation cannot be performed. */ - NotSupported(const std::string &file, int line, const std::string &func, - const std::string &obj_type) + NotSupported(const std::string& file, int line, const std::string& func, + const std::string& obj_type) : Error(file, line, "Operation " + func + " does not support parameters of type " + obj_type) @@ -186,7 +186,7 @@ class CudaError : public Error { * @param func The name of the CUDA routine that failed * @param error_code The resulting CUDA error code */ - CudaError(const std::string &file, int line, const std::string &func, + CudaError(const std::string& file, int line, const std::string& func, int64 error_code) : Error(file, line, func + ": " + get_error(error_code)) {} @@ -209,7 +209,7 @@ class CublasError : public Error { * @param func The name of the cuBLAS routine that failed * @param error_code The resulting cuBLAS error code */ - CublasError(const std::string &file, int line, const std::string &func, + CublasError(const std::string& file, int line, const std::string& func, int64 error_code) : Error(file, line, func + ": " + get_error(error_code)) {} @@ -232,7 +232,7 @@ class CurandError : public Error { * @param func The name of the cuRAND routine that failed * @param error_code The resulting cuRAND error code */ - CurandError(const std::string &file, int line, const std::string &func, + CurandError(const std::string& file, int line, const std::string& func, int64 error_code) : Error(file, line, func + ": " + get_error(error_code)) {} @@ -255,7 +255,7 @@ class CusparseError : public Error { * @param func The name of the cuSPARSE routine that failed * @param error_code The resulting cuSPARSE error code */ - CusparseError(const std::string &file, int line, const std::string &func, + CusparseError(const std::string& file, int line, const std::string& func, int64 error_code) : Error(file, line, func + ": " + get_error(error_code)) {} @@ -278,7 +278,7 @@ class HipError : public Error { * @param func The name of the HIP routine that failed * @param error_code The resulting HIP error code */ - HipError(const std::string &file, int line, const std::string &func, + HipError(const std::string& file, int line, const std::string& func, int64 error_code) : Error(file, line, func + ": " + get_error(error_code)) {} @@ -301,7 +301,7 @@ class HipblasError : public Error { * @param func The name of the hipBLAS routine that failed * @param error_code The resulting hipBLAS error code */ - HipblasError(const std::string &file, int line, const std::string &func, + HipblasError(const std::string& file, int line, const std::string& func, int64 error_code) : Error(file, line, func + ": " + get_error(error_code)) {} @@ -324,7 +324,7 @@ class HiprandError : public Error { * @param func The name of the hipRAND routine that failed * @param error_code The resulting hipRAND error code */ - HiprandError(const std::string &file, int line, const std::string &func, + HiprandError(const std::string& file, int line, const std::string& func, int64 error_code) : Error(file, line, func + ": " + get_error(error_code)) {} @@ -348,7 +348,7 @@ class HipsparseError : public Error { * @param func The name of the hipSPARSE routine that failed * @param error_code The resulting hipSPARSE error code */ - HipsparseError(const std::string &file, int line, const std::string &func, + HipsparseError(const std::string& file, int line, const std::string& func, int64 error_code) : Error(file, line, func + ": " + get_error(error_code)) {} @@ -378,11 +378,11 @@ class DimensionMismatch : public Error { * @param second_cols The input dimension of the second operator * @param clarification An additional message describing the error further */ - DimensionMismatch(const std::string &file, int line, - const std::string &func, const std::string &first_name, + DimensionMismatch(const std::string& file, int line, + const std::string& func, const std::string& first_name, size_type first_rows, size_type first_cols, - const std::string &second_name, size_type second_rows, - size_type second_cols, const std::string &clarification) + const std::string& second_name, size_type second_rows, + size_type second_cols, const std::string& clarification) : Error(file, line, func + ": attempting to combine operators " + first_name + " [" + std::to_string(first_rows) + " x " + @@ -410,9 +410,9 @@ class BadDimension : public Error { * @param op_num_cols The column dimension of the operator * @param clarification An additional message further describing the error */ - BadDimension(const std::string &file, int line, const std::string &func, - const std::string &op_name, size_type op_num_rows, - size_type op_num_cols, const std::string &clarification) + BadDimension(const std::string& file, int line, const std::string& func, + const std::string& op_name, size_type op_num_rows, + size_type op_num_cols, const std::string& clarification) : Error(file, line, func + ": Object " + op_name + " has dimensions [" + std::to_string(op_num_rows) + " x " + @@ -436,7 +436,7 @@ class BlockSizeError : public Error { * @param block_size Size of small dense blocks in a matrix * @param size The size that is not exactly divided by the block size */ - BlockSizeError(const std::string &file, const int line, + BlockSizeError(const std::string& file, const int line, const int block_size, const IndexType size) : Error(file, line, "block size = " + std::to_string(block_size) + @@ -460,9 +460,9 @@ class ValueMismatch : public Error { * @param val2 The second value to be compared. * @param clarification An additional message further describing the error */ - ValueMismatch(const std::string &file, int line, const std::string &func, + ValueMismatch(const std::string& file, int line, const std::string& func, size_type val1, size_type val2, - const std::string &clarification) + const std::string& clarification) : Error(file, line, func + ": Value mismatch : " + std::to_string(val1) + " and " + std::to_string(val2) + " : " + clarification) @@ -483,8 +483,8 @@ class AllocationError : public Error { * @param device The device on which the error occurred * @param bytes The size of the memory block whose allocation failed. */ - AllocationError(const std::string &file, int line, - const std::string &device, size_type bytes) + AllocationError(const std::string& file, int line, + const std::string& device, size_type bytes) : Error(file, line, device + ": failed to allocate memory block of " + std::to_string(bytes) + "B") @@ -506,7 +506,7 @@ class OutOfBoundsError : public Error { * @param index The position that was accessed * @param bound The first out-of-bound index */ - OutOfBoundsError(const std::string &file, int line, size_type index, + OutOfBoundsError(const std::string& file, int line, size_type index, size_type bound) : Error(file, line, "trying to access index " + std::to_string(index) + @@ -529,8 +529,8 @@ class StreamError : public Error { * @param func The name of the function that tried to access the file * @param message The error message */ - StreamError(const std::string &file, int line, const std::string &func, - const std::string &message) + StreamError(const std::string& file, int line, const std::string& func, + const std::string& message) : Error(file, line, func + ": " + message) {} }; @@ -549,7 +549,7 @@ class KernelNotFound : public Error { * @param line The source code line number where the error occurred * @param func The name of the function where the error occurred */ - KernelNotFound(const std::string &file, int line, const std::string &func) + KernelNotFound(const std::string& file, int line, const std::string& func) : Error(file, line, func + ": unable to find an eligible kernel") {} }; diff --git a/include/ginkgo/core/base/exception_helpers.hpp b/include/ginkgo/core/base/exception_helpers.hpp index 7a34a0835a0..e38799d895d 100644 --- a/include/ginkgo/core/base/exception_helpers.hpp +++ b/include/ginkgo/core/base/exception_helpers.hpp @@ -94,21 +94,21 @@ namespace detail { template struct dynamic_type_helper { - static const std::type_info &get(const T &obj) { return typeid(obj); } + static const std::type_info& get(const T& obj) { return typeid(obj); } }; template struct dynamic_type_helper::value || have_ownership()>::type> { - static const std::type_info &get(const T &obj) + static const std::type_info& get(const T& obj) { return obj ? typeid(*obj) : typeid(nullptr); } }; template -const std::type_info &get_dynamic_type(const T &obj) +const std::type_info& get_dynamic_type(const T& obj) { return dynamic_type_helper::get(obj); } @@ -139,12 +139,12 @@ namespace detail { template -inline dim<2> get_size(const T &op) +inline dim<2> get_size(const T& op) { return op->get_size(); } -inline dim<2> get_size(const dim<2> &size) { return size; } +inline dim<2> get_size(const dim<2>& size) { return size; } } // namespace detail @@ -471,8 +471,8 @@ namespace detail { template -inline T ensure_allocated_impl(T ptr, const std::string &file, int line, - const std::string &dev, size_type size) +inline T ensure_allocated_impl(T ptr, const std::string& file, int line, + const std::string& dev, size_type size) { if (ptr == nullptr) { throw AllocationError(file, line, dev, size); diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index b6770f92310..c176e16583a 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -273,7 +273,7 @@ class Operation { * * @return the operation's name */ - virtual const char *get_name() const noexcept; + virtual const char* get_name() const noexcept; }; @@ -298,11 +298,11 @@ class RegisteredOperation : public Operation { * @param name the name to be used for this operation * @param op a functor object which will be called with the executor. */ - RegisteredOperation(const char *name, int num_params, Closure op) + RegisteredOperation(const char* name, int num_params, Closure op) : name_(name), num_params_(num_params), op_(std::move(op)) {} - const char *get_name() const noexcept override + const char* get_name() const noexcept override { static auto name = [this] { std::ostringstream oss; @@ -338,14 +338,14 @@ class RegisteredOperation : public Operation { } private: - const char *name_; + const char* name_; int num_params_; Closure op_; }; template -RegisteredOperation make_register_operation(const char *name, +RegisteredOperation make_register_operation(const char* name, int num_params, Closure op) { return RegisteredOperation{name, num_params, std::move(op)}; @@ -428,7 +428,7 @@ RegisteredOperation make_register_operation(const char *name, */ #define GKO_REGISTER_OPERATION(_name, _kernel) \ template \ - auto make_##_name(Args &&... args) \ + auto make_##_name(Args&&... args) \ { \ return ::gko::detail::make_register_operation( \ #_name, sizeof...(Args), [&args...](auto exec) { \ @@ -583,17 +583,17 @@ class Executor : public log::EnableLogging { virtual ~Executor() = default; Executor() = default; - Executor(Executor &) = delete; - Executor(Executor &&) = default; - Executor &operator=(Executor &) = delete; - Executor &operator=(Executor &&) = default; + Executor(Executor&) = delete; + Executor(Executor&&) = default; + Executor& operator=(Executor&) = delete; + Executor& operator=(Executor&&) = default; /** * Runs the specified Operation using this Executor. * * @param op the operation to run */ - virtual void run(const Operation &op) const = 0; + virtual void run(const Operation& op) const = 0; /** * Runs one of the passed in functors, depending on the Executor type. @@ -609,8 +609,8 @@ class Executor : public log::EnableLogging { */ template - void run(const ClosureOmp &op_omp, const ClosureCuda &op_cuda, - const ClosureHip &op_hip, const ClosureDpcpp &op_dpcpp) const + void run(const ClosureOmp& op_omp, const ClosureCuda& op_cuda, + const ClosureHip& op_hip, const ClosureDpcpp& op_dpcpp) const { LambdaOperation op( op_omp, op_cuda, op_hip, op_dpcpp); @@ -629,11 +629,11 @@ class Executor : public log::EnableLogging { * @return pointer to allocated memory */ template - T *alloc(size_type num_elems) const + T* alloc(size_type num_elems) const { this->template log( this, num_elems * sizeof(T)); - T *allocated = static_cast(this->raw_alloc(num_elems * sizeof(T))); + T* allocated = static_cast(this->raw_alloc(num_elems * sizeof(T))); this->template log( this, num_elems * sizeof(T), reinterpret_cast(allocated)); return allocated; @@ -646,7 +646,7 @@ class Executor : public log::EnableLogging { * * @param ptr pointer to the allocated memory block */ - void free(void *ptr) const noexcept + void free(void* ptr) const noexcept { this->template log( this, reinterpret_cast(ptr)); @@ -668,8 +668,8 @@ class Executor : public log::EnableLogging { * where the data will be copied to */ template - void copy_from(const Executor *src_exec, size_type num_elems, - const T *src_ptr, T *dest_ptr) const + void copy_from(const Executor* src_exec, size_type num_elems, + const T* src_ptr, T* dest_ptr) const { this->template log( src_exec, this, reinterpret_cast(src_ptr), @@ -677,7 +677,7 @@ class Executor : public log::EnableLogging { try { this->raw_copy_from(src_exec, num_elems * sizeof(T), src_ptr, dest_ptr); - } catch (NotSupported &) { + } catch (NotSupported&) { #if (GKO_VERBOSE_LEVEL >= 1) && !defined(NDEBUG) // Unoptimized copy. Try to go through the masters. // output to log when verbose >= 1 and debug build @@ -686,7 +686,7 @@ class Executor : public log::EnableLogging { #endif auto src_master = src_exec->get_master().get(); if (num_elems > 0 && src_master != src_exec) { - auto *master_ptr = src_exec->get_master()->alloc(num_elems); + auto* master_ptr = src_exec->get_master()->alloc(num_elems); src_master->copy_from(src_exec, num_elems, src_ptr, master_ptr); this->copy_from(src_master, num_elems, master_ptr, dest_ptr); @@ -710,7 +710,7 @@ class Executor : public log::EnableLogging { * where the data will be copied to */ template - void copy(size_type num_elems, const T *src_ptr, T *dest_ptr) const + void copy(size_type num_elems, const T* src_ptr, T* dest_ptr) const { this->copy_from(this, num_elems, src_ptr, dest_ptr); } @@ -725,7 +725,7 @@ class Executor : public log::EnableLogging { * @return the value stored at ptr */ template - T copy_val_to_host(const T *ptr) const + T copy_val_to_host(const T* ptr) const { T out{}; this->get_master()->copy_from(this, 1, ptr, &out); @@ -755,7 +755,7 @@ class Executor : public log::EnableLogging { * * @return whether the executors this and other share the same memory. */ - bool memory_accessible(const std::shared_ptr &other) const + bool memory_accessible(const std::shared_ptr& other) const { return this->verify_memory_from(other.get()); } @@ -880,7 +880,7 @@ class Executor : public log::EnableLogging { * * @return the exec_info struct */ - const exec_info &get_exec_info() const { return this->exec_info_; } + const exec_info& get_exec_info() const { return this->exec_info_; } /** * Allocates raw memory in this Executor. @@ -891,7 +891,7 @@ class Executor : public log::EnableLogging { * * @return raw pointer to allocated memory */ - virtual void *raw_alloc(size_type size) const = 0; + virtual void* raw_alloc(size_type size) const = 0; /** * Frees memory previously allocated with Executor::alloc(). @@ -900,7 +900,7 @@ class Executor : public log::EnableLogging { * * @param ptr pointer to the allocated memory block */ - virtual void raw_free(void *ptr) const noexcept = 0; + virtual void raw_free(void* ptr) const noexcept = 0; /** * Copies raw data from another Executor. @@ -912,8 +912,8 @@ class Executor : public log::EnableLogging { * @param dest_ptr pointer to an allocated block of memory where the data * will be copied to */ - virtual void raw_copy_from(const Executor *src_exec, size_type n_bytes, - const void *src_ptr, void *dest_ptr) const = 0; + virtual void raw_copy_from(const Executor* src_exec, size_type n_bytes, + const void* src_ptr, void* dest_ptr) const = 0; /** * @internal @@ -925,8 +925,8 @@ class Executor : public log::EnableLogging { * @param _exec_type the Executor subclass */ #define GKO_ENABLE_RAW_COPY_TO(_exec_type, ...) \ - virtual void raw_copy_to(const _exec_type *dest_exec, size_type n_bytes, \ - const void *src_ptr, void *dest_ptr) const = 0 + virtual void raw_copy_to(const _exec_type* dest_exec, size_type n_bytes, \ + const void* src_ptr, void* dest_ptr) const = 0 GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_ENABLE_RAW_COPY_TO); @@ -939,7 +939,7 @@ class Executor : public log::EnableLogging { * * @return whether this executor and src_exec share the same memory. */ - virtual bool verify_memory_from(const Executor *src_exec) const = 0; + virtual bool verify_memory_from(const Executor* src_exec) const = 0; /** * @internal @@ -951,7 +951,7 @@ class Executor : public log::EnableLogging { * @param _exec_type the Executor subclass */ #define GKO_ENABLE_VERIFY_MEMORY_TO(_exec_type, ...) \ - virtual bool verify_memory_to(const _exec_type *dest_exec) const = 0 + virtual bool verify_memory_to(const _exec_type* dest_exec) const = 0 GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_ENABLE_VERIFY_MEMORY_TO); @@ -965,14 +965,14 @@ class Executor : public log::EnableLogging { * * @param mach_topo the machine topology object. */ - virtual void populate_exec_info(const MachineTopology *mach_topo) = 0; + virtual void populate_exec_info(const MachineTopology* mach_topo) = 0; /** * Gets the modifiable exec info object * * @return the pointer to the exec_info object */ - exec_info &get_exec_info() { return this->exec_info_; } + exec_info& get_exec_info() { return this->exec_info_; } exec_info exec_info_; @@ -1004,8 +1004,8 @@ class Executor : public log::EnableLogging { * @param op_dpcpp a functor object which will be called by * DpcppExecutor */ - LambdaOperation(const ClosureOmp &op_omp, const ClosureCuda &op_cuda, - const ClosureHip &op_hip, const ClosureDpcpp &op_dpcpp) + LambdaOperation(const ClosureOmp& op_omp, const ClosureCuda& op_cuda, + const ClosureHip& op_hip, const ClosureDpcpp& op_dpcpp) : op_omp_(op_omp), op_cuda_(op_cuda), op_hip_(op_hip), @@ -1052,7 +1052,7 @@ class Executor : public log::EnableLogging { template class executor_deleter { public: - using pointer = T *; + using pointer = T*; /** * Creates a new deleter. @@ -1110,7 +1110,7 @@ class ExecutorBase : public Executor { friend class ReferenceExecutor; public: - void run(const Operation &op) const override + void run(const Operation& op) const override { this->template log(this, &op); op.run(self()->shared_from_this()); @@ -1118,26 +1118,26 @@ class ExecutorBase : public Executor { } protected: - void raw_copy_from(const Executor *src_exec, size_type n_bytes, - const void *src_ptr, void *dest_ptr) const override + void raw_copy_from(const Executor* src_exec, size_type n_bytes, + const void* src_ptr, void* dest_ptr) const override { src_exec->raw_copy_to(self(), n_bytes, src_ptr, dest_ptr); } - virtual bool verify_memory_from(const Executor *src_exec) const override + virtual bool verify_memory_from(const Executor* src_exec) const override { return src_exec->verify_memory_to(self()); } private: - ConcreteExecutor *self() noexcept + ConcreteExecutor* self() noexcept { - return static_cast(this); + return static_cast(this); } - const ConcreteExecutor *self() const noexcept + const ConcreteExecutor* self() const noexcept { - return static_cast(this); + return static_cast(this); } }; @@ -1185,12 +1185,12 @@ class EnableDeviceReset { #define GKO_OVERRIDE_RAW_COPY_TO(_executor_type, ...) \ - void raw_copy_to(const _executor_type *dest_exec, size_type n_bytes, \ - const void *src_ptr, void *dest_ptr) const override + void raw_copy_to(const _executor_type* dest_exec, size_type n_bytes, \ + const void* src_ptr, void* dest_ptr) const override #define GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(dest_, bool_) \ - virtual bool verify_memory_to(const dest_ *other) const override \ + virtual bool verify_memory_to(const dest_* other) const override \ { \ return bool_; \ } \ @@ -1241,11 +1241,11 @@ class OmpExecutor : public detail::ExecutorBase, this->OmpExecutor::populate_exec_info(MachineTopology::get_instance()); } - void populate_exec_info(const MachineTopology *mach_topo) override; + void populate_exec_info(const MachineTopology* mach_topo) override; - void *raw_alloc(size_type size) const override; + void* raw_alloc(size_type size) const override; - void raw_free(void *ptr) const noexcept override; + void raw_free(void* ptr) const noexcept override; GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO); @@ -1257,7 +1257,7 @@ class OmpExecutor : public detail::ExecutorBase, GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(CudaExecutor, false); - bool verify_memory_to(const DpcppExecutor *dest_exec) const override; + bool verify_memory_to(const DpcppExecutor* dest_exec) const override; }; @@ -1282,7 +1282,7 @@ class ReferenceExecutor : public OmpExecutor { return std::shared_ptr(new ReferenceExecutor()); } - void run(const Operation &op) const override + void run(const Operation& op) const override { this->template log(this, &op); op.run(std::static_pointer_cast( @@ -1297,14 +1297,14 @@ class ReferenceExecutor : public OmpExecutor { MachineTopology::get_instance()); } - void populate_exec_info(const MachineTopology *) override + void populate_exec_info(const MachineTopology*) override { this->get_exec_info().device_id = -1; this->get_exec_info().num_computing_units = 1; this->get_exec_info().num_pu_per_cu = 1; } - bool verify_memory_from(const Executor *src_exec) const override + bool verify_memory_from(const Executor* src_exec) const override { return src_exec->verify_memory_to(this); } @@ -1362,7 +1362,7 @@ class CudaExecutor : public detail::ExecutorBase, void synchronize() const override; - void run(const Operation &op) const override; + void run(const Operation& op) const override; /** * Get the CUDA device id of the device associated to this executor. @@ -1431,14 +1431,14 @@ class CudaExecutor : public detail::ExecutorBase, * * @return the cublas handle (cublasContext*) for this executor */ - cublasContext *get_cublas_handle() const { return cublas_handle_.get(); } + cublasContext* get_cublas_handle() const { return cublas_handle_.get(); } /** * Get the cusparse handle for this executor * * @return the cusparse handle (cusparseContext*) for this executor */ - cusparseContext *get_cusparse_handle() const + cusparseContext* get_cusparse_handle() const { return cusparse_handle_.get(); } @@ -1489,9 +1489,9 @@ class CudaExecutor : public detail::ExecutorBase, this->init_handles(); } - void *raw_alloc(size_type size) const override; + void* raw_alloc(size_type size) const override; - void raw_free(void *ptr) const noexcept override; + void raw_free(void* ptr) const noexcept override; GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO); @@ -1501,9 +1501,9 @@ class CudaExecutor : public detail::ExecutorBase, GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(DpcppExecutor, false); - bool verify_memory_to(const HipExecutor *dest_exec) const override; + bool verify_memory_to(const HipExecutor* dest_exec) const override; - bool verify_memory_to(const CudaExecutor *dest_exec) const override; + bool verify_memory_to(const CudaExecutor* dest_exec) const override; static void increase_num_execs(unsigned device_id); @@ -1511,13 +1511,13 @@ class CudaExecutor : public detail::ExecutorBase, static unsigned get_num_execs(unsigned device_id); - void populate_exec_info(const MachineTopology *mach_topo) override; + void populate_exec_info(const MachineTopology* mach_topo) override; private: std::shared_ptr master_; template - using handle_manager = std::unique_ptr>; + using handle_manager = std::unique_ptr>; handle_manager cublas_handle_; handle_manager cusparse_handle_; @@ -1566,7 +1566,7 @@ class HipExecutor : public detail::ExecutorBase, void synchronize() const override; - void run(const Operation &op) const override; + void run(const Operation& op) const override; /** * Get the HIP device id of the device associated to this executor. @@ -1635,14 +1635,14 @@ class HipExecutor : public detail::ExecutorBase, * * @return the hipblas handle (hipblasContext*) for this executor */ - hipblasContext *get_hipblas_handle() const { return hipblas_handle_.get(); } + hipblasContext* get_hipblas_handle() const { return hipblas_handle_.get(); } /** * Get the hipsparse handle for this executor * * @return the hipsparse handle (hipsparseContext*) for this executor */ - hipsparseContext *get_hipsparse_handle() const + hipsparseContext* get_hipsparse_handle() const { return hipsparse_handle_.get(); } @@ -1693,9 +1693,9 @@ class HipExecutor : public detail::ExecutorBase, this->init_handles(); } - void *raw_alloc(size_type size) const override; + void* raw_alloc(size_type size) const override; - void raw_free(void *ptr) const noexcept override; + void raw_free(void* ptr) const noexcept override; GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO); @@ -1705,9 +1705,9 @@ class HipExecutor : public detail::ExecutorBase, GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(DpcppExecutor, false); - bool verify_memory_to(const CudaExecutor *dest_exec) const override; + bool verify_memory_to(const CudaExecutor* dest_exec) const override; - bool verify_memory_to(const HipExecutor *dest_exec) const override; + bool verify_memory_to(const HipExecutor* dest_exec) const override; static void increase_num_execs(int device_id); @@ -1715,13 +1715,13 @@ class HipExecutor : public detail::ExecutorBase, static int get_num_execs(int device_id); - void populate_exec_info(const MachineTopology *mach_topo) override; + void populate_exec_info(const MachineTopology* mach_topo) override; private: std::shared_ptr master_; template - using handle_manager = std::unique_ptr>; + using handle_manager = std::unique_ptr>; handle_manager hipblas_handle_; handle_manager hipsparse_handle_; @@ -1766,7 +1766,7 @@ class DpcppExecutor : public detail::ExecutorBase, void synchronize() const override; - void run(const Operation &op) const override; + void run(const Operation& op) const override; /** * Get the DPCPP device id of the device associated to this executor. @@ -1778,7 +1778,7 @@ class DpcppExecutor : public detail::ExecutorBase, return this->get_exec_info().device_id; } - ::cl::sycl::queue *get_queue() const { return queue_.get(); } + ::cl::sycl::queue* get_queue() const { return queue_.get(); } /** * Get the number of devices present on the system. @@ -1794,7 +1794,7 @@ class DpcppExecutor : public detail::ExecutorBase, * * @return the available subgroup sizes for this device */ - const std::vector &get_subgroup_sizes() const noexcept + const std::vector& get_subgroup_sizes() const noexcept { return this->get_exec_info().subgroup_sizes; } @@ -1814,7 +1814,7 @@ class DpcppExecutor : public detail::ExecutorBase, * * @return the maximum work item sizes */ - const std::vector &get_max_workitem_sizes() const noexcept + const std::vector& get_max_workitem_sizes() const noexcept { return this->get_exec_info().max_workitem_sizes; } @@ -1857,17 +1857,17 @@ class DpcppExecutor : public detail::ExecutorBase, : master_(master) { std::for_each(device_type.begin(), device_type.end(), - [](char &c) { c = std::tolower(c); }); + [](char& c) { c = std::tolower(c); }); this->get_exec_info().device_type = std::string(device_type); this->get_exec_info().device_id = device_id; this->set_device_property(); } - void populate_exec_info(const MachineTopology *mach_topo) override; + void populate_exec_info(const MachineTopology* mach_topo) override; - void *raw_alloc(size_type size) const override; + void* raw_alloc(size_type size) const override; - void raw_free(void *ptr) const noexcept override; + void raw_free(void* ptr) const noexcept override; GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_OVERRIDE_RAW_COPY_TO); @@ -1877,15 +1877,15 @@ class DpcppExecutor : public detail::ExecutorBase, GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(ReferenceExecutor, false); - bool verify_memory_to(const OmpExecutor *dest_exec) const override; + bool verify_memory_to(const OmpExecutor* dest_exec) const override; - bool verify_memory_to(const DpcppExecutor *dest_exec) const override; + bool verify_memory_to(const DpcppExecutor* dest_exec) const override; private: std::shared_ptr master_; template - using queue_manager = std::unique_ptr>; + using queue_manager = std::unique_ptr>; queue_manager<::cl::sycl::queue> queue_; }; diff --git a/include/ginkgo/core/base/lin_op.hpp b/include/ginkgo/core/base/lin_op.hpp index 50153c78ecb..540fd6ebad3 100644 --- a/include/ginkgo/core/base/lin_op.hpp +++ b/include/ginkgo/core/base/lin_op.hpp @@ -154,7 +154,7 @@ class LinOp : public EnableAbstractPolymorphicObject { * * @return this */ - LinOp *apply(const LinOp *b, LinOp *x) + LinOp* apply(const LinOp* b, LinOp* x) { this->template log(this, b, x); this->validate_application_parameters(b, x); @@ -168,7 +168,7 @@ class LinOp : public EnableAbstractPolymorphicObject { /** * @copydoc apply(const LinOp *, LinOp *) */ - const LinOp *apply(const LinOp *b, LinOp *x) const + const LinOp* apply(const LinOp* b, LinOp* x) const { this->template log(this, b, x); this->validate_application_parameters(b, x); @@ -189,8 +189,8 @@ class LinOp : public EnableAbstractPolymorphicObject { * * @return this */ - LinOp *apply(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) + LinOp* apply(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) { this->template log( this, alpha, b, beta, x); @@ -208,8 +208,8 @@ class LinOp : public EnableAbstractPolymorphicObject { /** * @copydoc apply(const LinOp *, const LinOp *, const LinOp *, LinOp *) */ - const LinOp *apply(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const + const LinOp* apply(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const { this->template log( this, alpha, b, beta, x); @@ -229,7 +229,7 @@ class LinOp : public EnableAbstractPolymorphicObject { * * @return size of the operator */ - const dim<2> &get_size() const noexcept { return size_; } + const dim<2>& get_size() const noexcept { return size_; } /** * Returns true if the linear operator uses the data given in x as @@ -248,7 +248,7 @@ class LinOp : public EnableAbstractPolymorphicObject { * @param size the size of the operator */ explicit LinOp(std::shared_ptr exec, - const dim<2> &size = dim<2>{}) + const dim<2>& size = dim<2>{}) : EnableAbstractPolymorphicObject(exec), size_{size} {} @@ -257,7 +257,7 @@ class LinOp : public EnableAbstractPolymorphicObject { * * @param value the new size of the operator */ - void set_size(const dim<2> &value) noexcept { size_ = value; } + void set_size(const dim<2>& value) noexcept { size_ = value; } /** * Implementers of LinOp should override this function instead @@ -268,7 +268,7 @@ class LinOp : public EnableAbstractPolymorphicObject { * @param b the input vector(s) on which the operator is applied * @param x the output vector(s) where the result is stored */ - virtual void apply_impl(const LinOp *b, LinOp *x) const = 0; + virtual void apply_impl(const LinOp* b, LinOp* x) const = 0; /** * Implementers of LinOp should override this function instead @@ -279,8 +279,8 @@ class LinOp : public EnableAbstractPolymorphicObject { * @param beta scaling of the input x * @param x output vector(s) */ - virtual void apply_impl(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const = 0; + virtual void apply_impl(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const = 0; /** * Throws a DimensionMismatch exception if the parameters to `apply` are of @@ -289,7 +289,7 @@ class LinOp : public EnableAbstractPolymorphicObject { * @param b vector(s) on which the operator is applied * @param x output vector(s) */ - void validate_application_parameters(const LinOp *b, const LinOp *x) const + void validate_application_parameters(const LinOp* b, const LinOp* x) const { GKO_ASSERT_CONFORMANT(this, b); GKO_ASSERT_EQUAL_ROWS(this, x); @@ -305,9 +305,9 @@ class LinOp : public EnableAbstractPolymorphicObject { * @param beta scaling of the input x * @param x output vector(s) */ - void validate_application_parameters(const LinOp *alpha, const LinOp *b, - const LinOp *beta, - const LinOp *x) const + void validate_application_parameters(const LinOp* alpha, const LinOp* b, + const LinOp* beta, + const LinOp* x) const { this->validate_application_parameters(b, x); GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1)); @@ -482,7 +482,7 @@ class Permutable { * @return a pointer to the new permuted object */ virtual std::unique_ptr permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { return as(this->row_permute(permutation_indices)) ->column_permute(permutation_indices); @@ -500,7 +500,7 @@ class Permutable { * @return a pointer to the new permuted object */ virtual std::unique_ptr inverse_permute( - const Array *permutation_indices) const + const Array* permutation_indices) const { return as(this->inverse_row_permute(permutation_indices)) ->inverse_column_permute(permutation_indices); @@ -517,7 +517,7 @@ class Permutable { * @return a pointer to the new permuted object */ virtual std::unique_ptr row_permute( - const Array *permutation_indices) const = 0; + const Array* permutation_indices) const = 0; /** * Returns a LinOp representing the column permutation of the Permutable @@ -531,7 +531,7 @@ class Permutable { * @return a pointer to the new column permuted object */ virtual std::unique_ptr column_permute( - const Array *permutation_indices) const = 0; + const Array* permutation_indices) const = 0; /** * Returns a LinOp representing the row permutation of the inverse permuted @@ -544,7 +544,7 @@ class Permutable { * @return a pointer to the new inverse permuted object */ virtual std::unique_ptr inverse_row_permute( - const Array *permutation_indices) const = 0; + const Array* permutation_indices) const = 0; /** * Returns a LinOp representing the row permutation of the inverse permuted @@ -558,7 +558,7 @@ class Permutable { * @return a pointer to the new inverse permuted object */ virtual std::unique_ptr inverse_column_permute( - const Array *permutation_indices) const = 0; + const Array* permutation_indices) const = 0; }; @@ -581,14 +581,14 @@ class ReadableFromMatrixData { * * @param data the matrix_data structure */ - virtual void read(const matrix_data &data) = 0; + virtual void read(const matrix_data& data) = 0; /** * Reads a matrix from a matrix_assembly_data structure. * * @param data the matrix_assembly_data structure */ - void read(const matrix_assembly_data &data) + void read(const matrix_assembly_data& data) { this->read(data.get_ordered_data()); } @@ -614,7 +614,7 @@ class WritableToMatrixData { * * @param data the matrix_data structure */ - virtual void write(matrix_data &data) const = 0; + virtual void write(matrix_data& data) const = 0; }; @@ -794,7 +794,7 @@ class EnableLinOp using EnablePolymorphicObject::EnablePolymorphicObject; - const ConcreteLinOp *apply(const LinOp *b, LinOp *x) const + const ConcreteLinOp* apply(const LinOp* b, LinOp* x) const { this->template log(this, b, x); this->validate_application_parameters(b, x); @@ -805,7 +805,7 @@ class EnableLinOp return self(); } - ConcreteLinOp *apply(const LinOp *b, LinOp *x) + ConcreteLinOp* apply(const LinOp* b, LinOp* x) { this->template log(this, b, x); this->validate_application_parameters(b, x); @@ -816,8 +816,8 @@ class EnableLinOp return self(); } - const ConcreteLinOp *apply(const LinOp *alpha, const LinOp *b, - const LinOp *beta, LinOp *x) const + const ConcreteLinOp* apply(const LinOp* alpha, const LinOp* b, + const LinOp* beta, LinOp* x) const { this->template log( this, alpha, b, beta, x); @@ -832,8 +832,8 @@ class EnableLinOp return self(); } - ConcreteLinOp *apply(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) + ConcreteLinOp* apply(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) { this->template log( this, alpha, b, beta, x); @@ -974,7 +974,7 @@ public: \ */ #define GKO_ENABLE_LIN_OP_FACTORY(_lin_op, _parameters_name, _factory_name) \ public: \ - const _parameters_name##_type &get_##_parameters_name() const \ + const _parameters_name##_type& get_##_parameters_name() const \ { \ return _parameters_name##_; \ } \ @@ -992,7 +992,7 @@ public: \ std::move(exec)) \ {} \ explicit _factory_name(std::shared_ptr exec, \ - const _parameters_name##_type ¶meters) \ + const _parameters_name##_type& parameters) \ : ::gko::EnableDefaultLinOpFactory<_factory_name, _lin_op, \ _parameters_name##_type>( \ std::move(exec), parameters) \ @@ -1046,8 +1046,8 @@ public: \ mutable _name{__VA_ARGS__}; \ \ template \ - auto with_##_name(Args &&... _value) \ - const->const std::decay_t & \ + auto with_##_name(Args&&... _value) \ + const->const std::decay_t& \ { \ using type = decltype(this->_name); \ this->_name = type{std::forward(_value)...}; \ @@ -1097,8 +1097,8 @@ public: \ mutable _name{__VA_ARGS__}; \ \ template \ - auto with_##_name(Args &&... _value) \ - const->const std::decay_t & \ + auto with_##_name(Args&&... _value) \ + const->const std::decay_t& \ { \ GKO_NOT_IMPLEMENTED; \ return *this; \ @@ -1111,8 +1111,8 @@ public: \ mutable _name{_default}; \ \ template \ - auto with_##_name(Arg &&_value) \ - const->const std::decay_t & \ + auto with_##_name(Arg&& _value) \ + const->const std::decay_t& \ { \ using type = decltype(this->_name); \ this->_name = type{std::forward(_value)}; \ @@ -1126,8 +1126,8 @@ public: \ mutable _name{__VA_ARGS__}; \ \ template \ - auto with_##_name(Args &&... _value) \ - const->const std::decay_t & \ + auto with_##_name(Args&&... _value) \ + const->const std::decay_t& \ { \ using type = decltype(this->_name); \ this->_name = type{std::forward(_value)...}; \ diff --git a/include/ginkgo/core/base/machine_topology.hpp b/include/ginkgo/core/base/machine_topology.hpp index 509c7ada786..19dc726a2ed 100644 --- a/include/ginkgo/core/base/machine_topology.hpp +++ b/include/ginkgo/core/base/machine_topology.hpp @@ -89,7 +89,7 @@ namespace gko { */ class MachineTopology { template - using hwloc_manager = std::unique_ptr>; + using hwloc_manager = std::unique_ptr>; /** * This struct holds the attributes for a normal non-IO object. @@ -208,7 +208,7 @@ class MachineTopology { * * @return the MachineTopology instance */ - static MachineTopology *get_instance() + static MachineTopology* get_instance() { static MachineTopology instance; return &instance; @@ -226,7 +226,7 @@ class MachineTopology { * See hwloc doc for * [singlify](https://www.open-mpi.org/projects/hwloc/doc/v2.4.0/a00175.php#gaa611a77c092e679246afdf9a60d5db8b) */ - void bind_to_cores(const std::vector &ids, + void bind_to_cores(const std::vector& ids, const bool singlify = true) const { hwloc_binding_helper(this->cores_, ids, singlify); @@ -237,7 +237,7 @@ class MachineTopology { * * @param ids The ids of the core to be bound to the calling process. */ - void bind_to_core(const int &id) const + void bind_to_core(const int& id) const { MachineTopology::get_instance()->bind_to_cores(std::vector{id}); } @@ -254,7 +254,7 @@ class MachineTopology { * See hwloc doc for * [singlify](https://www.open-mpi.org/projects/hwloc/doc/v2.4.0/a00175.php#gaa611a77c092e679246afdf9a60d5db8b) */ - void bind_to_pus(const std::vector &ids, + void bind_to_pus(const std::vector& ids, const bool singlify = true) const { hwloc_binding_helper(this->pus_, ids, singlify); @@ -265,7 +265,7 @@ class MachineTopology { * * @param ids The ids of PUs to be bound to the calling process. */ - void bind_to_pu(const int &id) const + void bind_to_pu(const int& id) const { MachineTopology::get_instance()->bind_to_pus(std::vector{id}); } @@ -276,7 +276,7 @@ class MachineTopology { * @param id The id of the PU * @return the PU object struct. */ - const normal_obj_info *get_pu(size_type id) const + const normal_obj_info* get_pu(size_type id) const { GKO_ENSURE_IN_BOUNDS(id, this->pus_.size()); return &this->pus_[id]; @@ -288,7 +288,7 @@ class MachineTopology { * @param id The id of the core * @return the core object struct. */ - const normal_obj_info *get_core(size_type id) const + const normal_obj_info* get_core(size_type id) const { GKO_ENSURE_IN_BOUNDS(id, this->cores_.size()); return &this->cores_[id]; @@ -300,7 +300,7 @@ class MachineTopology { * @param id The id of the pci device * @return the PCI object struct. */ - const io_obj_info *get_pci_device(size_type id) const + const io_obj_info* get_pci_device(size_type id) const { GKO_ENSURE_IN_BOUNDS(id, this->pci_devices_.size()); return &this->pci_devices_[id]; @@ -312,7 +312,7 @@ class MachineTopology { * @param pci_bus_id The PCI bus id of the pci device * @return the PCI object struct. */ - const io_obj_info *get_pci_device(const std::string &pci_bus_id) const; + const io_obj_info* get_pci_device(const std::string& pci_bus_id) const; /** * Get the number of PU objects stored in this Topology tree. @@ -349,8 +349,8 @@ class MachineTopology { * object . */ void hwloc_binding_helper( - const std::vector &obj, - const std::vector &ids, const bool singlify = true) const; + const std::vector& obj, + const std::vector& ids, const bool singlify = true) const; /** * @internal @@ -361,7 +361,7 @@ class MachineTopology { * logical index with these functions */ void load_objects(hwloc_obj_type_t type, - std::vector &objects) const; + std::vector& objects) const; /** * @internal @@ -372,7 +372,7 @@ class MachineTopology { * logical index with these functions */ void load_objects(hwloc_obj_type_t type, - std::vector &vector) const; + std::vector& vector) const; /** * @@ -380,7 +380,7 @@ class MachineTopology { * * Get object id from the os index */ - int get_obj_id_by_os_index(const std::vector &objects, + int get_obj_id_by_os_index(const std::vector& objects, size_type os_index) const; /** @@ -389,7 +389,7 @@ class MachineTopology { * * Get object id from the hwloc index */ - int get_obj_id_by_gp_index(const std::vector &objects, + int get_obj_id_by_gp_index(const std::vector& objects, size_type gp_index) const; private: @@ -398,10 +398,10 @@ class MachineTopology { * be only one global object per execution. */ MachineTopology(); - MachineTopology(MachineTopology &) = delete; - MachineTopology(MachineTopology &&) = delete; - MachineTopology &operator=(MachineTopology &) = delete; - MachineTopology &operator=(MachineTopology &&) = delete; + MachineTopology(MachineTopology&) = delete; + MachineTopology(MachineTopology&&) = delete; + MachineTopology& operator=(MachineTopology&) = delete; + MachineTopology& operator=(MachineTopology&&) = delete; ~MachineTopology() = default; std::vector pus_; diff --git a/include/ginkgo/core/base/math.hpp b/include/ginkgo/core/base/math.hpp index 81f7349daa8..962aef4c852 100644 --- a/include/ginkgo/core/base/math.hpp +++ b/include/ginkgo/core/base/math.hpp @@ -622,7 +622,7 @@ GKO_INLINE __host__ constexpr T zero() * `zero(x)`. */ template -GKO_INLINE __host__ constexpr T zero(const T &) +GKO_INLINE __host__ constexpr T zero(const T&) { return zero(); } @@ -650,7 +650,7 @@ GKO_INLINE __host__ constexpr T one() * `one(x)`. */ template -GKO_INLINE __host__ constexpr T one(const T &) +GKO_INLINE __host__ constexpr T one(const T&) { return one(); } @@ -680,7 +680,7 @@ zero() * `zero(x)`. */ template -GKO_INLINE __device__ constexpr T zero(const T &) +GKO_INLINE __device__ constexpr T zero(const T&) { return zero(); } @@ -710,7 +710,7 @@ one() * `one(x)`. */ template -GKO_INLINE __device__ constexpr T one(const T &) +GKO_INLINE __device__ constexpr T one(const T&) { return one(); } @@ -741,7 +741,7 @@ GKO_INLINE GKO_ATTRIBUTES constexpr T zero() * `zero(x)`. */ template -GKO_INLINE GKO_ATTRIBUTES constexpr T zero(const T &) +GKO_INLINE GKO_ATTRIBUTES constexpr T zero(const T&) { return zero(); } @@ -769,7 +769,7 @@ GKO_INLINE GKO_ATTRIBUTES constexpr T one() * `one(x)`. */ template -GKO_INLINE GKO_ATTRIBUTES constexpr T one(const T &) +GKO_INLINE GKO_ATTRIBUTES constexpr T one(const T&) { return one(); } @@ -793,7 +793,7 @@ GKO_INLINE GKO_ATTRIBUTES constexpr T one(const T &) * */ template -GKO_INLINE GKO_ATTRIBUTES constexpr T max(const T &x, const T &y) +GKO_INLINE GKO_ATTRIBUTES constexpr T max(const T& x, const T& y) { return x >= y ? x : y; } @@ -811,7 +811,7 @@ GKO_INLINE GKO_ATTRIBUTES constexpr T max(const T &x, const T &y) * */ template -GKO_INLINE GKO_ATTRIBUTES constexpr T min(const T &x, const T &y) +GKO_INLINE GKO_ATTRIBUTES constexpr T min(const T& x, const T& y) { return x <= y ? x : y; } @@ -828,7 +828,7 @@ GKO_INLINE GKO_ATTRIBUTES constexpr T min(const T &x, const T &y) */ template GKO_ATTRIBUTES GKO_INLINE constexpr std::enable_if_t::value, T> -real(const T &x) +real(const T& x) { return x; } @@ -836,7 +836,7 @@ real(const T &x) template GKO_ATTRIBUTES GKO_INLINE constexpr std::enable_if_t::value, remove_complex> -real(const T &x) +real(const T& x) { return x.real(); } @@ -853,7 +853,7 @@ real(const T &x) */ template GKO_ATTRIBUTES GKO_INLINE constexpr std::enable_if_t::value, T> -imag(const T &) +imag(const T&) { return zero(); } @@ -861,7 +861,7 @@ imag(const T &) template GKO_ATTRIBUTES GKO_INLINE constexpr std::enable_if_t::value, remove_complex> -imag(const T &x) +imag(const T& x) { return x.imag(); } @@ -876,14 +876,14 @@ imag(const T &x) */ template GKO_ATTRIBUTES GKO_INLINE std::enable_if_t::value, T> conj( - const T &x) + const T& x) { return x; } template GKO_ATTRIBUTES GKO_INLINE std::enable_if_t::value, T> conj( - const T &x) + const T& x) { return T{x.real(), -x.imag()}; } @@ -897,7 +897,7 @@ GKO_ATTRIBUTES GKO_INLINE std::enable_if_t::value, T> conj( * @return The squared norm of the object. */ template -GKO_INLINE GKO_ATTRIBUTES constexpr auto squared_norm(const T &x) +GKO_INLINE GKO_ATTRIBUTES constexpr auto squared_norm(const T& x) -> decltype(real(conj(x) * x)) { return real(conj(x) * x); @@ -916,7 +916,7 @@ GKO_INLINE GKO_ATTRIBUTES constexpr auto squared_norm(const T &x) template GKO_INLINE GKO_ATTRIBUTES constexpr xstd::enable_if_t::value, T> - abs(const T &x) + abs(const T& x) { return x >= zero() ? x : -x; } @@ -925,7 +925,7 @@ GKO_INLINE template GKO_INLINE GKO_ATTRIBUTES constexpr xstd::enable_if_t::value, remove_complex> -abs(const T &x) +abs(const T& x) { return sqrt(squared_norm(x)); } @@ -944,7 +944,7 @@ abs(const T &x) * @return maximum of `hint` and the significant bit position of `n` */ template -constexpr uint32 get_significant_bit(const T &n, uint32 hint = 0u) noexcept +constexpr uint32 get_significant_bit(const T& n, uint32 hint = 0u) noexcept { return (T{1} << (hint + 1)) > n ? hint : get_significant_bit(n, hint + 1u); } @@ -962,8 +962,8 @@ constexpr uint32 get_significant_bit(const T &n, uint32 hint = 0u) noexcept * @return the smallest power of `base` not smaller than `limit` */ template -constexpr T get_superior_power(const T &base, const T &limit, - const T &hint = T{1}) noexcept +constexpr T get_superior_power(const T& base, const T& limit, + const T& hint = T{1}) noexcept { return hint >= limit ? hint : get_superior_power(base, limit, hint * base); } @@ -982,7 +982,7 @@ constexpr T get_superior_power(const T &base, const T &limit, */ template GKO_INLINE GKO_ATTRIBUTES std::enable_if_t::value, bool> -is_finite(const T &value) +is_finite(const T& value) { constexpr T infinity{detail::infinity_impl::value}; return abs(value) < infinity; @@ -1002,7 +1002,7 @@ is_finite(const T &value) */ template GKO_INLINE GKO_ATTRIBUTES std::enable_if_t::value, bool> -is_finite(const T &value) +is_finite(const T& value) { return is_finite(value.real()) && is_finite(value.imag()); } diff --git a/include/ginkgo/core/base/matrix_data.hpp b/include/ginkgo/core/base/matrix_data.hpp index 74b85e4140e..fe51189f0a5 100644 --- a/include/ginkgo/core/base/matrix_data.hpp +++ b/include/ginkgo/core/base/matrix_data.hpp @@ -65,7 +65,7 @@ struct input_triple { template typename std::enable_if::value, ValueType>::type -get_rand_value(Distribution &&dist, Generator &&gen) +get_rand_value(Distribution&& dist, Generator&& gen) { return dist(gen); } @@ -73,7 +73,7 @@ get_rand_value(Distribution &&dist, Generator &&gen) template typename std::enable_if::value, ValueType>::type -get_rand_value(Distribution &&dist, Generator &&gen) +get_rand_value(Distribution&& dist, Generator&& gen) { return ValueType(dist(gen), dist(gen)); } @@ -115,7 +115,7 @@ struct matrix_data { {} #define GKO_DEFINE_DEFAULT_COMPARE_OPERATOR(_op) \ - bool operator _op(const nonzero_type &other) const \ + bool operator _op(const nonzero_type& other) const \ { \ return std::tie(this->row, this->column, this->value) \ _op std::tie(other.row, other.column, other.value); \ @@ -165,7 +165,7 @@ struct matrix_data { * @param engine random engine used to generate random values */ template - matrix_data(dim<2> size_, RandomDistribution &&dist, RandomEngine &&engine) + matrix_data(dim<2> size_, RandomDistribution&& dist, RandomEngine&& engine) : size{size_} { for (size_type row = 0; row < size[0]; ++row) { @@ -191,7 +191,7 @@ struct matrix_data { const auto row_data = begin(values)[row]; size[1] = std::max(size[1], row_data.size()); for (size_type col = 0; col < row_data.size(); ++col) { - const auto &val = begin(row_data)[col]; + const auto& val = begin(row_data)[col]; if (val != zero()) { nonzeros.emplace_back(row, col, val); } @@ -212,7 +212,7 @@ struct matrix_data { : size{size_}, nonzeros() { nonzeros.reserve(nonzeros_.size()); - for (const auto &elem : nonzeros_) { + for (const auto& elem : nonzeros_) { nonzeros.emplace_back(elem.row, elem.col, elem.val); } } @@ -223,13 +223,13 @@ struct matrix_data { * @param size size of the block-matrix (in blocks) * @param diag_block matrix block used to fill the complete matrix */ - matrix_data(dim<2> size_, const matrix_data &block) + matrix_data(dim<2> size_, const matrix_data& block) : size{size_ * block.size} { nonzeros.reserve(size_[0] * size_[1] * block.nonzeros.size()); for (size_type row = 0; row < size_[0]; ++row) { for (size_type col = 0; col < size_[1]; ++col) { - for (const auto &elem : block.nonzeros) { + for (const auto& elem : block.nonzeros) { nonzeros.emplace_back(row * block.size[0] + elem.row, col * block.size[1] + elem.column, elem.value); @@ -247,7 +247,7 @@ struct matrix_data { * @param data range used to initialize the matrix */ template - matrix_data(const range &data) + matrix_data(const range& data) : size{data.length(0), data.length(1)} { for (gko::size_type row = 0; row < size[0]; ++row) { @@ -309,13 +309,13 @@ struct matrix_data { * * @return the block-diagonal matrix */ - static matrix_data diag(dim<2> size_, const matrix_data &block) + static matrix_data diag(dim<2> size_, const matrix_data& block) { matrix_data res(size_ * block.size); const auto num_blocks = std::min(size_[0], size_[1]); res.nonzeros.reserve(num_blocks * block.nonzeros.size()); for (size_type b = 0; b < num_blocks; ++b) { - for (const auto &elem : block.nonzeros) { + for (const auto& elem : block.nonzeros) { res.nonzeros.emplace_back(b * block.size[0] + elem.row, b * block.size[1] + elem.column, elem.value); @@ -339,14 +339,14 @@ struct matrix_data { static matrix_data diag(ForwardIterator begin, ForwardIterator end) { matrix_data res(std::accumulate( - begin, end, dim<2>{}, [](dim<2> s, const matrix_data &d) { + begin, end, dim<2>{}, [](dim<2> s, const matrix_data& d) { return dim<2>{s[0] + d.size[0], s[1] + d.size[1]}; })); size_type row_offset{}; size_type col_offset{}; for (auto it = begin; it != end; ++it) { - for (const auto &elem : it->nonzeros) { + for (const auto& elem : it->nonzeros) { res.nonzeros.emplace_back(row_offset + elem.row, col_offset + elem.column, elem.value); } @@ -392,7 +392,7 @@ struct matrix_data { template static matrix_data cond(size_type size, remove_complex condition_number, - RandomDistribution &&dist, RandomEngine &&engine, + RandomDistribution&& dist, RandomEngine&& engine, size_type num_reflectors) { using range = range>; @@ -436,7 +436,7 @@ struct matrix_data { template static matrix_data cond(size_type size, remove_complex condition_number, - RandomDistribution &&dist, RandomEngine &&engine) + RandomDistribution&& dist, RandomEngine&& engine) { return cond(size, condition_number, std::forward(dist), @@ -472,7 +472,7 @@ struct matrix_data { template static void initialize_diag_with_cond( remove_complex condition_number, - const range &matrix) + const range& matrix) { using sigma_type = remove_complex; const auto size = matrix.length(0); @@ -490,9 +490,9 @@ struct matrix_data { template - static void generate_random_reflector(RandomDistribution &&dist, - RandomEngine &&engine, - const range &reflector) + static void generate_random_reflector(RandomDistribution&& dist, + RandomEngine&& engine, + const range& reflector) { for (gko::size_type i = 0; i < reflector.length(0); ++i) { reflector(i, 0) = detail::get_rand_value(dist, engine); @@ -500,9 +500,9 @@ struct matrix_data { } template - static void reflect_domain(const range &reflector, - const range &matrix, - ValueType *work_data) + static void reflect_domain(const range& reflector, + const range& matrix, + ValueType* work_data) { const auto two = one() + one(); range> work(work_data, @@ -514,9 +514,9 @@ struct matrix_data { } template - static void reflect_range(const range &reflector, - const range &matrix, - ValueType *work_data) + static void reflect_range(const range& reflector, + const range& matrix, + ValueType* work_data) { const auto two = one() + one(); range> work( diff --git a/include/ginkgo/core/base/mtx_io.hpp b/include/ginkgo/core/base/mtx_io.hpp index 3fde2de108d..fc52bf984a8 100644 --- a/include/ginkgo/core/base/mtx_io.hpp +++ b/include/ginkgo/core/base/mtx_io.hpp @@ -58,7 +58,7 @@ namespace gko { * structure. Consider using gko::read instead. */ template -matrix_data read_raw(std::istream &is); +matrix_data read_raw(std::istream& is); /** @@ -91,7 +91,7 @@ enum class layout_type { * gko::write instead. */ template -void write_raw(std::ostream &os, const matrix_data &data, +void write_raw(std::ostream& os, const matrix_data& data, layout_type layout = layout_type::array); @@ -110,7 +110,7 @@ void write_raw(std::ostream &os, const matrix_data &data, * @return A MatrixType LinOp filled with data from filename */ template -inline std::unique_ptr read(StreamType &&is, MatrixArgs &&... args) +inline std::unique_ptr read(StreamType&& is, MatrixArgs&&... args) { auto mtx = MatrixType::create(std::forward(args)...); mtx->read(read_raw read(StreamType &&is, MatrixArgs &&... args) * @param layout the layout used in the output */ template -inline void write(StreamType &&os, MatrixType *matrix, +inline void write(StreamType&& os, MatrixType* matrix, layout_type layout = layout_type::array) { matrix_data( + std::unique_ptr( abi::__cxa_demangle(tinfo.name(), nullptr, nullptr, &status), std::free) .get()); @@ -82,7 +82,7 @@ inline std::string get_type_name(const std::type_info &tinfo) * @param unused */ template -std::string get_static_type(const T &) +std::string get_static_type(const T&) { return get_type_name(typeid(T)); } @@ -97,7 +97,7 @@ std::string get_static_type(const T &) * @param t the object we get the dynamic type of */ template -std::string get_dynamic_type(const T &t) +std::string get_dynamic_type(const T& t) { return get_type_name(typeid(t)); } @@ -107,7 +107,7 @@ namespace detail { template -std::string get_enclosing_scope(const T &) +std::string get_enclosing_scope(const T&) { auto name = get_type_name(typeid(T)); auto found = name.rfind(':'); diff --git a/include/ginkgo/core/base/perturbation.hpp b/include/ginkgo/core/base/perturbation.hpp index 219c29d9ad1..0048ae6e16e 100644 --- a/include/ginkgo/core/base/perturbation.hpp +++ b/include/ginkgo/core/base/perturbation.hpp @@ -149,10 +149,10 @@ class Perturbation : public EnableLinOp>, this->validate_perturbation(); } - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; /** * Validates the dimensions of the `scalar`, `basis` and `projector` @@ -175,8 +175,8 @@ class Perturbation : public EnableLinOp>, mutable struct cache_struct { cache_struct() = default; ~cache_struct() = default; - cache_struct(const cache_struct &other) {} - cache_struct &operator=(const cache_struct &other) { return *this; } + cache_struct(const cache_struct& other) {} + cache_struct& operator=(const cache_struct& other) { return *this; } // allocate linops of cache. The dimenstion of `intermediate` is // (the number of rows of projector, the number of columns of b). Others diff --git a/include/ginkgo/core/base/polymorphic_object.hpp b/include/ginkgo/core/base/polymorphic_object.hpp index e9d09e5d636..71741479211 100644 --- a/include/ginkgo/core/base/polymorphic_object.hpp +++ b/include/ginkgo/core/base/polymorphic_object.hpp @@ -77,7 +77,7 @@ class PolymorphicObject : public log::EnableLogging { } // preserve the executor of the object - PolymorphicObject &operator=(const PolymorphicObject &) { return *this; } + PolymorphicObject& operator=(const PolymorphicObject&) { return *this; } /** * Creates a new "default" object of the same dynamic type as this object. @@ -155,7 +155,7 @@ class PolymorphicObject : public log::EnableLogging { * * @return this */ - PolymorphicObject *copy_from(const PolymorphicObject *other) + PolymorphicObject* copy_from(const PolymorphicObject* other) { this->template log( exec_.get(), other, this); @@ -176,7 +176,7 @@ class PolymorphicObject : public log::EnableLogging { * * @return this */ - PolymorphicObject *copy_from(std::unique_ptr other) + PolymorphicObject* copy_from(std::unique_ptr other) { this->template log( exec_.get(), other.get(), this); @@ -195,7 +195,7 @@ class PolymorphicObject : public log::EnableLogging { * * @return this */ - PolymorphicObject *clear() { return this->clear_impl(); } + PolymorphicObject* clear() { return this->clear_impl(); } /** * Returns the Executor of the object. @@ -222,7 +222,7 @@ class PolymorphicObject : public log::EnableLogging { {} // preserve the executor of the object - explicit PolymorphicObject(const PolymorphicObject &other) + explicit PolymorphicObject(const PolymorphicObject& other) { *this = other; } @@ -246,8 +246,8 @@ class PolymorphicObject : public log::EnableLogging { * * @return this */ - virtual PolymorphicObject *copy_from_impl( - const PolymorphicObject *other) = 0; + virtual PolymorphicObject* copy_from_impl( + const PolymorphicObject* other) = 0; /** * Implementers of PolymorphicObject should implement this function instead @@ -257,7 +257,7 @@ class PolymorphicObject : public log::EnableLogging { * * @return this */ - virtual PolymorphicObject *copy_from_impl( + virtual PolymorphicObject* copy_from_impl( std::unique_ptr other) = 0; /** @@ -266,7 +266,7 @@ class PolymorphicObject : public log::EnableLogging { * * @return this */ - virtual PolymorphicObject *clear_impl() = 0; + virtual PolymorphicObject* clear_impl() = 0; private: std::shared_ptr exec_; @@ -299,7 +299,7 @@ class EnableAbstractPolymorphicObject : public PolymorphicBase { std::unique_ptr create_default( std::shared_ptr exec) const { - return std::unique_ptr{static_cast( + return std::unique_ptr{static_cast( this->create_default_impl(std::move(exec)).release())}; } @@ -321,20 +321,20 @@ class EnableAbstractPolymorphicObject : public PolymorphicBase { return this->clone(this->get_executor()); } - AbstractObject *copy_from(const PolymorphicObject *other) + AbstractObject* copy_from(const PolymorphicObject* other) { - return static_cast(this->copy_from_impl(other)); + return static_cast(this->copy_from_impl(other)); } - AbstractObject *copy_from(std::unique_ptr other) + AbstractObject* copy_from(std::unique_ptr other) { - return static_cast( + return static_cast( this->copy_from_impl(std::move(other))); } - AbstractObject *clear() + AbstractObject* clear() { - return static_cast(this->clear_impl()); + return static_cast(this->clear_impl()); } }; @@ -347,12 +347,12 @@ class EnableAbstractPolymorphicObject : public PolymorphicBase { * implementing mixins which depend on the type of the affected object, in which * case the type is set to the affected object (i.e. the CRTP parameter). */ -#define GKO_ENABLE_SELF(_type) \ - _type *self() noexcept { return static_cast<_type *>(this); } \ - \ - const _type *self() const noexcept \ - { \ - return static_cast(this); \ +#define GKO_ENABLE_SELF(_type) \ + _type* self() noexcept { return static_cast<_type*>(this); } \ + \ + const _type* self() const noexcept \ + { \ + return static_cast(this); \ } @@ -398,7 +398,7 @@ class ConvertibleTo { * * @param result the object used to store the result of the conversion */ - virtual void convert_to(result_type *result) const = 0; + virtual void convert_to(result_type* result) const = 0; /** * Converts the implementer to an object of type result_type by moving data @@ -414,7 +414,7 @@ class ConvertibleTo { * optimized by exploiting the fact that implementer's data can be * moved to the result. */ - virtual void move_to(result_type *result) = 0; + virtual void move_to(result_type* result) = 0; }; @@ -422,12 +422,12 @@ namespace detail { template -std::unique_ptr> copy_and_convert_to_impl( - std::shared_ptr exec, T *obj) +std::unique_ptr> copy_and_convert_to_impl( + std::shared_ptr exec, T* obj) { - auto obj_as_r = dynamic_cast(obj); + auto obj_as_r = dynamic_cast(obj); if (obj_as_r != nullptr && obj->get_executor() == exec) { - return {obj_as_r, [](R *) {}}; + return {obj_as_r, [](R*) {}}; } else { auto copy = R::create(exec); as>>(obj)->convert_to(lend(copy)); @@ -471,8 +471,8 @@ std::shared_ptr copy_and_convert_to_impl( * object */ template -std::unique_ptr> copy_and_convert_to( - std::shared_ptr exec, T *obj) +std::unique_ptr> copy_and_convert_to( + std::shared_ptr exec, T* obj) { return detail::copy_and_convert_to_impl(std::move(exec), obj); } @@ -485,8 +485,8 @@ std::unique_ptr> copy_and_convert_to( * result if the input had the same qualifier. */ template -std::unique_ptr> copy_and_convert_to( - std::shared_ptr exec, const T *obj) +std::unique_ptr> copy_and_convert_to( + std::shared_ptr exec, const T* obj) { return detail::copy_and_convert_to_impl(std::move(exec), obj); } @@ -579,20 +579,20 @@ class EnablePolymorphicObject return std::unique_ptr{new ConcreteObject(exec)}; } - PolymorphicObject *copy_from_impl(const PolymorphicObject *other) override + PolymorphicObject* copy_from_impl(const PolymorphicObject* other) override { as>(other)->convert_to(self()); return this; } - PolymorphicObject *copy_from_impl( + PolymorphicObject* copy_from_impl( std::unique_ptr other) override { as>(other.get())->move_to(self()); return this; } - PolymorphicObject *clear_impl() override + PolymorphicObject* clear_impl() override { *self() = ConcreteObject{this->get_executor()}; return this; @@ -620,9 +620,9 @@ class EnablePolymorphicAssignment : public ConvertibleTo { public: using result_type = ResultType; - void convert_to(result_type *result) const override { *result = *self(); } + void convert_to(result_type* result) const override { *result = *self(); } - void move_to(result_type *result) override { *result = std::move(*self()); } + void move_to(result_type* result) override { *result = std::move(*self()); } private: GKO_ENABLE_SELF(ConcreteType); @@ -641,7 +641,7 @@ template class EnableCreateMethod { public: template - static std::unique_ptr create(Args &&... args) + static std::unique_ptr create(Args&&... args) { return std::unique_ptr( new ConcreteType(std::forward(args)...)); diff --git a/include/ginkgo/core/base/precision_dispatch.hpp b/include/ginkgo/core/base/precision_dispatch.hpp index 376f126f5c5..606dcc6115e 100644 --- a/include/ginkgo/core/base/precision_dispatch.hpp +++ b/include/ginkgo/core/base/precision_dispatch.hpp @@ -68,7 +68,7 @@ namespace gko { */ template detail::temporary_conversion> -make_temporary_conversion(LinOp *matrix) +make_temporary_conversion(LinOp* matrix) { auto result = detail::temporary_conversion>::template create< @@ -83,7 +83,7 @@ make_temporary_conversion(LinOp *matrix) /** @copydoc make_temporary_conversion(LinOp*) */ template detail::temporary_conversion> -make_temporary_conversion(const LinOp *matrix) +make_temporary_conversion(const LinOp* matrix) { auto result = detail::temporary_conversion>:: template create>>(matrix); @@ -109,7 +109,7 @@ make_temporary_conversion(const LinOp *matrix) * @tparam Args the argument type list. * */ template -void precision_dispatch(Function fn, Args *... linops) +void precision_dispatch(Function fn, Args*... linops) { fn(make_temporary_conversion(linops).get()...); } @@ -125,7 +125,7 @@ void precision_dispatch(Function fn, Args *... linops) * @see precision_dispatch() */ template -void precision_dispatch_real_complex(Function fn, const LinOp *in, LinOp *out) +void precision_dispatch_real_complex(Function fn, const LinOp* in, LinOp* out) { // do we need to convert complex Dense to real Dense? // all real dense vectors are intra-convertible, thus by casting to @@ -133,7 +133,7 @@ void precision_dispatch_real_complex(Function fn, const LinOp *in, LinOp *out) // dense matrix: auto complex_to_real = !(is_complex() || - dynamic_cast> *>(in)); + dynamic_cast>*>(in)); if (complex_to_real) { auto dense_in = make_temporary_conversion>(in); auto dense_out = make_temporary_conversion>(out); @@ -141,8 +141,8 @@ void precision_dispatch_real_complex(Function fn, const LinOp *in, LinOp *out) // These dynamic_casts are only needed to make the code compile // If ValueType is complex, this branch will never be taken // If ValueType is real, the cast is a no-op - fn(dynamic_cast(dense_in->create_real_view().get()), - dynamic_cast(dense_out->create_real_view().get())); + fn(dynamic_cast(dense_in->create_real_view().get()), + dynamic_cast(dense_out->create_real_view().get())); } else { precision_dispatch(fn, in, out); } @@ -159,8 +159,8 @@ void precision_dispatch_real_complex(Function fn, const LinOp *in, LinOp *out) * @see precision_dispatch() */ template -void precision_dispatch_real_complex(Function fn, const LinOp *alpha, - const LinOp *in, LinOp *out) +void precision_dispatch_real_complex(Function fn, const LinOp* alpha, + const LinOp* in, LinOp* out) { // do we need to convert complex Dense to real Dense? // all real dense vectors are intra-convertible, thus by casting to @@ -168,7 +168,7 @@ void precision_dispatch_real_complex(Function fn, const LinOp *alpha, // dense matrix: auto complex_to_real = !(is_complex() || - dynamic_cast> *>(in)); + dynamic_cast>*>(in)); if (complex_to_real) { auto dense_in = make_temporary_conversion>(in); auto dense_out = make_temporary_conversion>(out); @@ -178,8 +178,8 @@ void precision_dispatch_real_complex(Function fn, const LinOp *alpha, // If ValueType is complex, this branch will never be taken // If ValueType is real, the cast is a no-op fn(dense_alpha.get(), - dynamic_cast(dense_in->create_real_view().get()), - dynamic_cast(dense_out->create_real_view().get())); + dynamic_cast(dense_in->create_real_view().get()), + dynamic_cast(dense_out->create_real_view().get())); } else { precision_dispatch(fn, alpha, in, out); } @@ -196,9 +196,9 @@ void precision_dispatch_real_complex(Function fn, const LinOp *alpha, * @see precision_dispatch() */ template -void precision_dispatch_real_complex(Function fn, const LinOp *alpha, - const LinOp *in, const LinOp *beta, - LinOp *out) +void precision_dispatch_real_complex(Function fn, const LinOp* alpha, + const LinOp* in, const LinOp* beta, + LinOp* out) { // do we need to convert complex Dense to real Dense? // all real dense vectors are intra-convertible, thus by casting to @@ -206,7 +206,7 @@ void precision_dispatch_real_complex(Function fn, const LinOp *alpha, // dense matrix: auto complex_to_real = !(is_complex() || - dynamic_cast> *>(in)); + dynamic_cast>*>(in)); if (complex_to_real) { auto dense_in = make_temporary_conversion>(in); auto dense_out = make_temporary_conversion>(out); @@ -217,9 +217,9 @@ void precision_dispatch_real_complex(Function fn, const LinOp *alpha, // If ValueType is complex, this branch will never be taken // If ValueType is real, the cast is a no-op fn(dense_alpha.get(), - dynamic_cast(dense_in->create_real_view().get()), + dynamic_cast(dense_in->create_real_view().get()), dense_beta.get(), - dynamic_cast(dense_out->create_real_view().get())); + dynamic_cast(dense_out->create_real_view().get())); } else { precision_dispatch(fn, alpha, in, beta, out); } @@ -256,23 +256,23 @@ void precision_dispatch_real_complex(Function fn, const LinOp *alpha, * with the converted arguments. */ template -void mixed_precision_dispatch(Function fn, const LinOp *in, LinOp *out) +void mixed_precision_dispatch(Function fn, const LinOp* in, LinOp* out) { #ifdef GINKGO_MIXED_PRECISION using fst_type = matrix::Dense; using snd_type = matrix::Dense>; - if (auto dense_in = dynamic_cast(in)) { - if (auto dense_out = dynamic_cast(out)) { + if (auto dense_in = dynamic_cast(in)) { + if (auto dense_out = dynamic_cast(out)) { fn(dense_in, dense_out); - } else if (auto dense_out = dynamic_cast(out)) { + } else if (auto dense_out = dynamic_cast(out)) { fn(dense_in, dense_out); } else { GKO_NOT_SUPPORTED(out); } - } else if (auto dense_in = dynamic_cast(in)) { - if (auto dense_out = dynamic_cast(out)) { + } else if (auto dense_in = dynamic_cast(in)) { + if (auto dense_out = dynamic_cast(out)) { fn(dense_in, dense_out); - } else if (auto dense_out = dynamic_cast(out)) { + } else if (auto dense_out = dynamic_cast(out)) { fn(dense_in, dense_out); } else { GKO_NOT_SUPPORTED(out); @@ -296,9 +296,9 @@ void mixed_precision_dispatch(Function fn, const LinOp *in, LinOp *out) * @see mixed_precision_dispatch() */ template ()> * = nullptr> -void mixed_precision_dispatch_real_complex(Function fn, const LinOp *in, - LinOp *out) + std::enable_if_t()>* = nullptr> +void mixed_precision_dispatch_real_complex(Function fn, const LinOp* in, + LinOp* out) { #ifdef GINKGO_MIXED_PRECISION mixed_precision_dispatch(fn, in, out); @@ -309,12 +309,12 @@ void mixed_precision_dispatch_real_complex(Function fn, const LinOp *in, template ()> * = nullptr> -void mixed_precision_dispatch_real_complex(Function fn, const LinOp *in, - LinOp *out) + std::enable_if_t()>* = nullptr> +void mixed_precision_dispatch_real_complex(Function fn, const LinOp* in, + LinOp* out) { #ifdef GINKGO_MIXED_PRECISION - if (!dynamic_cast> *>(in)) { + if (!dynamic_cast>*>(in)) { mixed_precision_dispatch>( [&fn](auto dense_in, auto dense_out) { fn(dense_in->create_real_view().get(), diff --git a/include/ginkgo/core/base/range.hpp b/include/ginkgo/core/base/range.hpp index 3942a1d218d..215229762b9 100644 --- a/include/ginkgo/core/base/range.hpp +++ b/include/ginkgo/core/base/range.hpp @@ -124,43 +124,43 @@ struct span { }; -GKO_ATTRIBUTES GKO_INLINE constexpr bool operator<(const span &first, - const span &second) +GKO_ATTRIBUTES GKO_INLINE constexpr bool operator<(const span& first, + const span& second) { return first.end < second.begin; } -GKO_ATTRIBUTES GKO_INLINE constexpr bool operator<=(const span &first, - const span &second) +GKO_ATTRIBUTES GKO_INLINE constexpr bool operator<=(const span& first, + const span& second) { return first.end <= second.begin; } -GKO_ATTRIBUTES GKO_INLINE constexpr bool operator>(const span &first, - const span &second) +GKO_ATTRIBUTES GKO_INLINE constexpr bool operator>(const span& first, + const span& second) { return second < first; } -GKO_ATTRIBUTES GKO_INLINE constexpr bool operator>=(const span &first, - const span &second) +GKO_ATTRIBUTES GKO_INLINE constexpr bool operator>=(const span& first, + const span& second) { return second <= first; } -GKO_ATTRIBUTES GKO_INLINE constexpr bool operator==(const span &first, - const span &second) +GKO_ATTRIBUTES GKO_INLINE constexpr bool operator==(const span& first, + const span& second) { return first.begin == second.begin && first.end == second.end; } -GKO_ATTRIBUTES GKO_INLINE constexpr bool operator!=(const span &first, - const span &second) +GKO_ATTRIBUTES GKO_INLINE constexpr bool operator!=(const span& first, + const span& second) { return !(first == second); } @@ -175,7 +175,7 @@ GKO_ATTRIBUTES constexpr GKO_INLINE std::enable_if_t<(CurrentDimension >= max(FirstRange::dimensionality, SecondRange::dimensionality)), bool> - equal_dimensions(const FirstRange &, const SecondRange &) + equal_dimensions(const FirstRange&, const SecondRange&) { return true; } @@ -186,7 +186,7 @@ GKO_ATTRIBUTES constexpr GKO_INLINE std::enable_if_t<(CurrentDimension < max(FirstRange::dimensionality, SecondRange::dimensionality)), bool> - equal_dimensions(const FirstRange &first, const SecondRange &second) + equal_dimensions(const FirstRange& first, const SecondRange& second) { return first.length(CurrentDimension) == second.length(CurrentDimension) && equal_dimensions(first, second); @@ -332,7 +332,7 @@ class range { * @param params parameters forwarded to Accessor constructor. */ template - GKO_ATTRIBUTES constexpr explicit range(AccessorParams &&... params) + GKO_ATTRIBUTES constexpr explicit range(AccessorParams&&... params) : accessor_{std::forward(params)...} {} @@ -349,7 +349,7 @@ class range { * @return a value on position `(dimensions...)`. */ template - GKO_ATTRIBUTES constexpr auto operator()(DimensionTypes &&... dimensions) + GKO_ATTRIBUTES constexpr auto operator()(DimensionTypes&&... dimensions) const -> decltype(std::declval()( std::forward(dimensions)...)) { @@ -367,8 +367,8 @@ class range { * @tparam OtherAccessor accessor of the other range */ template - GKO_ATTRIBUTES const range &operator=( - const range &other) const + GKO_ATTRIBUTES const range& operator=( + const range& other) const { GKO_ASSERT(detail::equal_dimensions(*this, other)); accessor_.copy_from(other); @@ -388,14 +388,14 @@ class range { * * @param other the range to copy the data from */ - GKO_ATTRIBUTES const range &operator=(const range &other) const + GKO_ATTRIBUTES const range& operator=(const range& other) const { GKO_ASSERT(detail::equal_dimensions(*this, other)); accessor_.copy_from(other.get_accessor()); return *this; } - range(const range &other) = default; + range(const range& other) = default; /** * Returns the length of the specified dimension of the range. @@ -416,7 +416,7 @@ class range { * * @return pointer to the accessor */ - GKO_ATTRIBUTES constexpr const accessor *operator->() const noexcept + GKO_ATTRIBUTES constexpr const accessor* operator->() const noexcept { return &accessor_; } @@ -426,7 +426,7 @@ class range { * * @return reference to the accessor */ - GKO_ATTRIBUTES constexpr const accessor &get_accessor() const noexcept + GKO_ATTRIBUTES constexpr const accessor& get_accessor() const noexcept { return accessor_; } @@ -453,13 +453,13 @@ struct implement_unary_operation { static constexpr size_type dimensionality = accessor::dimensionality; GKO_ATTRIBUTES constexpr explicit implement_unary_operation( - const Accessor &operand) + const Accessor& operand) : operand{operand} {} template GKO_ATTRIBUTES constexpr auto operator()( - const DimensionTypes &... dimensions) const + const DimensionTypes&... dimensions) const -> decltype(Operation::evaluate(std::declval(), dimensions...)) { @@ -472,7 +472,7 @@ struct implement_unary_operation { } template - GKO_ATTRIBUTES void copy_from(const OtherAccessor &other) const = delete; + GKO_ATTRIBUTES void copy_from(const OtherAccessor& other) const = delete; const accessor operand; }; @@ -493,7 +493,7 @@ struct implement_binary_operation GKO_ATTRIBUTES constexpr auto operator()( - const DimensionTypes &... dimensions) const + const DimensionTypes&... dimensions) const -> decltype(Operation::evaluate_range_by_range( std::declval(), std::declval(), dimensions...)) @@ -515,7 +515,7 @@ struct implement_binary_operation - GKO_ATTRIBUTES void copy_from(const OtherAccessor &other) const = delete; + GKO_ATTRIBUTES void copy_from(const OtherAccessor& other) const = delete; const first_accessor first; const second_accessor second; @@ -528,13 +528,13 @@ struct implement_binary_operation GKO_ATTRIBUTES constexpr auto operator()( - const DimensionTypes &... dimensions) const + const DimensionTypes&... dimensions) const -> decltype(Operation::evaluate_scalar_by_range( std::declval(), std::declval(), dimensions...)) @@ -549,7 +549,7 @@ struct implement_binary_operation - GKO_ATTRIBUTES void copy_from(const OtherAccessor &other) const = delete; + GKO_ATTRIBUTES void copy_from(const OtherAccessor& other) const = delete; const FirstOperand first; const second_accessor second; @@ -562,13 +562,13 @@ struct implement_binary_operation GKO_ATTRIBUTES constexpr auto operator()( - const DimensionTypes &... dimensions) const + const DimensionTypes&... dimensions) const -> decltype(Operation::evaluate_range_by_scalar( std::declval(), std::declval(), dimensions...)) @@ -583,7 +583,7 @@ struct implement_binary_operation - GKO_ATTRIBUTES void copy_from(const OtherAccessor &other) const = delete; + GKO_ATTRIBUTES void copy_from(const OtherAccessor& other) const = delete; const first_accessor first; const SecondOperand second; @@ -612,7 +612,7 @@ struct implement_binary_operation \ GKO_ATTRIBUTES constexpr GKO_INLINE \ range> \ - _operator_name(const range &operand) \ + _operator_name(const range& operand) \ { \ return range>( \ operand.get_accessor()); \ @@ -622,25 +622,24 @@ struct implement_binary_operation \ - GKO_ATTRIBUTES static constexpr auto simple_evaluate_impl( \ - const Operand &operand) -> decltype(__VA_ARGS__) \ - { \ - return __VA_ARGS__; \ - } \ - \ - public: \ - template \ - GKO_ATTRIBUTES static constexpr auto evaluate( \ - const AccessorType &accessor, \ - const DimensionTypes &... dimensions) \ - -> decltype(simple_evaluate_impl(accessor(dimensions...))) \ - { \ - return simple_evaluate_impl(accessor(dimensions...)); \ - } \ +#define GKO_DEFINE_SIMPLE_UNARY_OPERATION(_name, ...) \ + struct _name { \ + private: \ + template \ + GKO_ATTRIBUTES static constexpr auto simple_evaluate_impl( \ + const Operand& operand) -> decltype(__VA_ARGS__) \ + { \ + return __VA_ARGS__; \ + } \ + \ + public: \ + template \ + GKO_ATTRIBUTES static constexpr auto evaluate( \ + const AccessorType& accessor, const DimensionTypes&... dimensions) \ + -> decltype(simple_evaluate_impl(accessor(dimensions...))) \ + { \ + return simple_evaluate_impl(accessor(dimensions...)); \ + } \ } @@ -711,16 +710,16 @@ struct transpose_operation { static constexpr size_type dimensionality = accessor::dimensionality; GKO_ATTRIBUTES constexpr explicit transpose_operation( - const Accessor &operand) + const Accessor& operand) : operand{operand} {} template GKO_ATTRIBUTES constexpr auto operator()( - const FirstDimensionType &first_dim, - const SecondDimensionType &second_dim, - const DimensionTypes &... dims) const + const FirstDimensionType& first_dim, + const SecondDimensionType& second_dim, + const DimensionTypes&... dims) const -> decltype(std::declval()(second_dim, first_dim, dims...)) { return operand(second_dim, first_dim, dims...); @@ -733,7 +732,7 @@ struct transpose_operation { } template - GKO_ATTRIBUTES void copy_from(const OtherAccessor &other) const = delete; + GKO_ATTRIBUTES void copy_from(const OtherAccessor& other) const = delete; const accessor operand; }; @@ -772,8 +771,8 @@ GKO_BIND_UNARY_RANGE_OPERATION_TO_OPERATOR(transpose_operation, transpose); template \ GKO_ATTRIBUTES constexpr GKO_INLINE range> \ - _operator_name(const range &first, \ - const range &second) \ + _operator_name(const range& first, \ + const range& second) \ { \ return range> \ - _operator_name(const range &first, \ - const range &second) \ + _operator_name(const range& first, \ + const range& second) \ { \ return range> \ - _operator_name(const range &first, \ - const SecondOperand &second) \ + _operator_name(const range& first, \ + const SecondOperand& second) \ { \ return range> \ - _operator_name(const FirstOperand &first, \ - const range &second) \ + _operator_name(const FirstOperand& first, \ + const range& second) \ { \ return range \ GKO_ATTRIBUTES constexpr static auto simple_evaluate_impl( \ - const FirstOperand &first, const SecondOperand &second) \ + const FirstOperand& first, const SecondOperand& second) \ -> decltype(__VA_ARGS__) \ { \ return __VA_ARGS__; \ @@ -835,8 +834,8 @@ GKO_BIND_UNARY_RANGE_OPERATION_TO_OPERATOR(transpose_operation, transpose); template \ GKO_ATTRIBUTES static constexpr auto evaluate_range_by_range( \ - const FirstAccessor &first, const SecondAccessor &second, \ - const DimensionTypes &... dims) \ + const FirstAccessor& first, const SecondAccessor& second, \ + const DimensionTypes&... dims) \ -> decltype(simple_evaluate_impl(first(dims...), second(dims...))) \ { \ return simple_evaluate_impl(first(dims...), second(dims...)); \ @@ -845,8 +844,8 @@ GKO_BIND_UNARY_RANGE_OPERATION_TO_OPERATOR(transpose_operation, transpose); template \ GKO_ATTRIBUTES static constexpr auto evaluate_scalar_by_range( \ - const FirstOperand &first, const SecondAccessor &second, \ - const DimensionTypes &... dims) \ + const FirstOperand& first, const SecondAccessor& second, \ + const DimensionTypes&... dims) \ -> decltype(simple_evaluate_impl(first, second(dims...))) \ { \ return simple_evaluate_impl(first, second(dims...)); \ @@ -855,8 +854,8 @@ GKO_BIND_UNARY_RANGE_OPERATION_TO_OPERATOR(transpose_operation, transpose); template \ GKO_ATTRIBUTES static constexpr auto evaluate_range_by_scalar( \ - const FirstAccessor &first, const SecondOperand &second, \ - const DimensionTypes &... dims) \ + const FirstAccessor& first, const SecondOperand& second, \ + const DimensionTypes&... dims) \ -> decltype(simple_evaluate_impl(first(dims...), second)) \ { \ return simple_evaluate_impl(first(dims...), second); \ @@ -871,7 +870,7 @@ namespace detail { // binary arithmetic GKO_DEFINE_SIMPLE_BINARY_OPERATION(add, first + second); GKO_DEFINE_SIMPLE_BINARY_OPERATION(sub, first - second); -GKO_DEFINE_SIMPLE_BINARY_OPERATION(mul, first *second); +GKO_DEFINE_SIMPLE_BINARY_OPERATION(mul, first* second); GKO_DEFINE_SIMPLE_BINARY_OPERATION(div, first / second); GKO_DEFINE_SIMPLE_BINARY_OPERATION(mod, first % second); @@ -885,11 +884,11 @@ GKO_DEFINE_SIMPLE_BINARY_OPERATION(not_equal, first != second); // binary logical GKO_DEFINE_SIMPLE_BINARY_OPERATION(logical_or, first || second); -GKO_DEFINE_SIMPLE_BINARY_OPERATION(logical_and, first &&second); +GKO_DEFINE_SIMPLE_BINARY_OPERATION(logical_and, first&& second); // binary bitwise GKO_DEFINE_SIMPLE_BINARY_OPERATION(bitwise_or, first | second); -GKO_DEFINE_SIMPLE_BINARY_OPERATION(bitwise_and, first &second); +GKO_DEFINE_SIMPLE_BINARY_OPERATION(bitwise_and, first& second); GKO_DEFINE_SIMPLE_BINARY_OPERATION(bitwise_xor, first ^ second); GKO_DEFINE_SIMPLE_BINARY_OPERATION(left_shift, first << second); GKO_DEFINE_SIMPLE_BINARY_OPERATION(right_shift, first >> second); @@ -962,8 +961,8 @@ struct mmul_operation { "Both ranges need to have the same number of dimensions"); static constexpr size_type dimensionality = first_accessor::dimensionality; - GKO_ATTRIBUTES explicit mmul_operation(const FirstAccessor &first, - const SecondAccessor &second) + GKO_ATTRIBUTES explicit mmul_operation(const FirstAccessor& first, + const SecondAccessor& second) : first{first}, second{second} { GKO_ASSERT(first.length(1) == second.length(0)); @@ -972,9 +971,9 @@ struct mmul_operation { template - GKO_ATTRIBUTES auto operator()(const FirstDimension &row, - const SecondDimension &col, - const DimensionTypes &... rest) const + GKO_ATTRIBUTES auto operator()(const FirstDimension& row, + const SecondDimension& col, + const DimensionTypes&... rest) const -> decltype(std::declval()(row, 0, rest...) * std::declval()(0, col, rest...) + std::declval()(row, 1, rest...) * @@ -998,7 +997,7 @@ struct mmul_operation { } template - GKO_ATTRIBUTES void copy_from(const OtherAccessor &other) const = delete; + GKO_ATTRIBUTES void copy_from(const OtherAccessor& other) const = delete; const first_accessor first; const second_accessor second; diff --git a/include/ginkgo/core/base/range_accessors.hpp b/include/ginkgo/core/base/range_accessors.hpp index 142d5e931b8..f82781dac00 100644 --- a/include/ginkgo/core/base/range_accessors.hpp +++ b/include/ginkgo/core/base/range_accessors.hpp @@ -81,7 +81,7 @@ class row_major { /** * Type of underlying data storage. */ - using data_type = value_type *; + using data_type = value_type*; /** * Number of dimensions of the accessor. @@ -115,7 +115,7 @@ class row_major { * * @return data element at (row, col) */ - constexpr GKO_ATTRIBUTES value_type &operator()(size_type row, + constexpr GKO_ATTRIBUTES value_type& operator()(size_type row, size_type col) const { return GKO_ASSERT(row < lengths[0]), GKO_ASSERT(col < lengths[1]), @@ -130,8 +130,8 @@ class row_major { * * @return sub-range spanning the range (rows, cols) */ - constexpr GKO_ATTRIBUTES range operator()(const span &rows, - const span &cols) const + constexpr GKO_ATTRIBUTES range operator()(const span& rows, + const span& cols) const { return GKO_ASSERT(rows.is_valid()), GKO_ASSERT(cols.is_valid()), GKO_ASSERT(rows <= span{lengths[0]}), @@ -166,7 +166,7 @@ class row_major { * @param other other accessor */ template - GKO_ATTRIBUTES void copy_from(const OtherAccessor &other) const + GKO_ATTRIBUTES void copy_from(const OtherAccessor& other) const { for (size_type i = 0; i < lengths[0]; ++i) { for (size_type j = 0; j < lengths[1]; ++j) { diff --git a/include/ginkgo/core/base/std_extensions.hpp b/include/ginkgo/core/base/std_extensions.hpp index 166d8c68bf7..e8eb8ae0ec6 100644 --- a/include/ginkgo/core/base/std_extensions.hpp +++ b/include/ginkgo/core/base/std_extensions.hpp @@ -86,7 +86,7 @@ using decay_t = std::decay_t; // Kept for backward compatibility. template -constexpr bool greater(const T &&lhs, const T &&rhs) +constexpr bool greater(const T&& lhs, const T&& rhs) { return std::greater()(lhs, rhs); } @@ -94,7 +94,7 @@ constexpr bool greater(const T &&lhs, const T &&rhs) // Kept for backward compatibility. template -constexpr bool greater_equal(const T &&lhs, const T &&rhs) +constexpr bool greater_equal(const T&& lhs, const T&& rhs) { return std::greater_equal()(lhs, rhs); } @@ -102,7 +102,7 @@ constexpr bool greater_equal(const T &&lhs, const T &&rhs) // Kept for backward compatibility. template -constexpr bool less(const T &&lhs, const T &&rhs) +constexpr bool less(const T&& lhs, const T&& rhs) { return std::less()(lhs, rhs); } @@ -110,7 +110,7 @@ constexpr bool less(const T &&lhs, const T &&rhs) // Kept for backward compatibility. template -constexpr bool less_equal(const T &&lhs, const T &&rhs) +constexpr bool less_equal(const T&& lhs, const T&& rhs) { return std::less_equal()(lhs, rhs); } diff --git a/include/ginkgo/core/base/temporary_clone.hpp b/include/ginkgo/core/base/temporary_clone.hpp index 589a0ca9114..c80cc8524b4 100644 --- a/include/ginkgo/core/base/temporary_clone.hpp +++ b/include/ginkgo/core/base/temporary_clone.hpp @@ -65,7 +65,7 @@ namespace detail { template class copy_back_deleter { public: - using pointer = T *; + using pointer = T*; /** * Creates a new deleter object. @@ -95,7 +95,7 @@ class copy_back_deleter { template class copy_back_deleter { public: - using pointer = const T *; + using pointer = const T*; copy_back_deleter(pointer original) : original_{original} {} void operator()(pointer ptr) const { delete ptr; } @@ -108,7 +108,7 @@ class copy_back_deleter { template struct temporary_clone_helper { static std::unique_ptr create(std::shared_ptr exec, - T *ptr, bool) + T* ptr, bool) { return gko::clone(std::move(exec), ptr); } @@ -130,7 +130,7 @@ template class temporary_clone { public: using value_type = T; - using pointer = T *; + using pointer = T*; /** * Creates a temporary_clone. @@ -161,18 +161,18 @@ class temporary_clone { * * @return the object held by temporary_clone */ - T *get() const { return handle_.get(); } + T* get() const { return handle_.get(); } /** * Calls a method on the underlying object. * * @return the underlying object */ - T *operator->() const { return handle_.get(); } + T* operator->() const { return handle_.get(); } private: // std::function deleter allows to decide the (type of) deleter at runtime - using handle_type = std::unique_ptr>; + using handle_type = std::unique_ptr>; handle_type handle_; }; @@ -193,7 +193,7 @@ class temporary_clone { */ template detail::temporary_clone make_temporary_clone( - std::shared_ptr exec, T *ptr) + std::shared_ptr exec, T* ptr) { return detail::temporary_clone(std::move(exec), ptr); } @@ -213,7 +213,7 @@ detail::temporary_clone make_temporary_clone( */ template detail::temporary_clone make_temporary_output_clone( - std::shared_ptr exec, T *ptr) + std::shared_ptr exec, T* ptr) { static_assert( !std::is_const::value, diff --git a/include/ginkgo/core/base/temporary_conversion.hpp b/include/ginkgo/core/base/temporary_conversion.hpp index f651855e345..7bba0d9f522 100644 --- a/include/ginkgo/core/base/temporary_conversion.hpp +++ b/include/ginkgo/core/base/temporary_conversion.hpp @@ -68,8 +68,8 @@ namespace detail { template class convert_back_deleter { public: - using pointer = CopyType *; - using original_pointer = OrigType *; + using pointer = CopyType*; + using original_pointer = OrigType*; /** * Creates a new deleter object. @@ -99,8 +99,8 @@ class convert_back_deleter { template class convert_back_deleter { public: - using pointer = const CopyType *; - using original_pointer = const OrigType *; + using pointer = const CopyType*; + using original_pointer = const OrigType*; convert_back_deleter(original_pointer) {} void operator()(pointer ptr) const { delete ptr; } @@ -121,8 +121,8 @@ template struct conversion_helper { /** Dispatch convert_impl with the ConversionCandidates list */ template - static std::unique_ptr> - convert(MaybeConstLinOp *obj) + static std::unique_ptr> + convert(MaybeConstLinOp* obj) { return convert_impl(obj); @@ -135,15 +135,15 @@ struct conversion_helper { */ template - static std::unique_ptr> - convert_impl(MaybeConstLinOp *obj) + static std::unique_ptr> + convert_impl(MaybeConstLinOp* obj) { // make candidate_type conditionally const based on whether obj is const using candidate_type = std::conditional_t::value, const FirstCandidate, FirstCandidate>; - candidate_type *cast_obj{}; - if ((cast_obj = dynamic_cast(obj))) { + candidate_type* cast_obj{}; + if ((cast_obj = dynamic_cast(obj))) { // if the cast is successful, obj is of dynamic type candidate_type // so we can convert from this type to TargetType auto converted = TargetType::create(obj->get_executor()); @@ -169,8 +169,8 @@ struct conversion_helper { template <> struct conversion_helper<> { template - static std::unique_ptr> convert( - MaybeConstLinOp *obj) + static std::unique_ptr> convert( + MaybeConstLinOp* obj) { // return nullptr if no previous candidates matched return {nullptr, null_deleter{}}; @@ -194,7 +194,7 @@ template class temporary_conversion { public: using value_type = T; - using pointer = T *; + using pointer = T*; using lin_op_type = std::conditional_t::value, const LinOp, LinOp>; @@ -205,10 +205,10 @@ class temporary_conversion { * try out for converting ptr to type T. */ template - static temporary_conversion create(lin_op_type *ptr) + static temporary_conversion create(lin_op_type* ptr) { - T *cast_ptr{}; - if ((cast_ptr = dynamic_cast(ptr))) { + T* cast_ptr{}; + if ((cast_ptr = dynamic_cast(ptr))) { return handle_type{cast_ptr, null_deleter{}}; } else { return conversion_helper::template convert< @@ -221,14 +221,14 @@ class temporary_conversion { * * @return the object held by temporary_conversion */ - T *get() const { return handle_.get(); } + T* get() const { return handle_.get(); } /** * Calls a method on the underlying object. * * @return the underlying object */ - T *operator->() const { return handle_.get(); } + T* operator->() const { return handle_.get(); } /** * Returns if the conversion was successful. @@ -238,7 +238,7 @@ class temporary_conversion { private: // std::function deleter allows to decide the (type of) deleter at // runtime - using handle_type = std::unique_ptr>; + using handle_type = std::unique_ptr>; temporary_conversion(handle_type handle) : handle_{std::move(handle)} {} diff --git a/include/ginkgo/core/base/utils_helper.hpp b/include/ginkgo/core/base/utils_helper.hpp index 51efb3d5ca7..d15dd601615 100644 --- a/include/ginkgo/core/base/utils_helper.hpp +++ b/include/ginkgo/core/base/utils_helper.hpp @@ -63,7 +63,7 @@ template struct pointee_impl {}; template -struct pointee_impl { +struct pointee_impl { using type = T; }; @@ -157,12 +157,12 @@ using shared_type = std::shared_ptr>; * object. */ template -inline detail::cloned_type clone(const Pointer &p) +inline detail::cloned_type clone(const Pointer& p) { static_assert(detail::is_clonable>(), "Object is not clonable"); return detail::cloned_type( - static_cast>::type *>( + static_cast>::type*>( p->clone().release())); } @@ -184,12 +184,12 @@ inline detail::cloned_type clone(const Pointer &p) */ template inline detail::cloned_type clone(std::shared_ptr exec, - const Pointer &p) + const Pointer& p) { static_assert(detail::is_clonable_to>(), "Object is not clonable"); return detail::cloned_type( - static_cast>::type *>( + static_cast>::type*>( p->clone(std::move(exec)).release())); } @@ -207,7 +207,7 @@ inline detail::cloned_type clone(std::shared_ptr exec, * @note The original pointer `p` becomes invalid after this call. */ template -inline detail::shared_type share(OwningPointer &&p) +inline detail::shared_type share(OwningPointer&& p) { static_assert(detail::have_ownership(), "OwningPointer does not have ownership of the object"); @@ -228,8 +228,8 @@ inline detail::shared_type share(OwningPointer &&p) * @note The original pointer `p` becomes invalid after this call. */ template -inline typename std::remove_reference::type &&give( - OwningPointer &&p) +inline typename std::remove_reference::type&& give( + OwningPointer&& p) { static_assert(detail::have_ownership(), "OwningPointer does not have ownership of the object"); @@ -249,8 +249,8 @@ inline typename std::remove_reference::type &&give( */ template inline typename std::enable_if::value, - detail::pointee *>::type -lend(const Pointer &p) + detail::pointee*>::type +lend(const Pointer& p) { return p.get(); } @@ -267,8 +267,8 @@ lend(const Pointer &p) */ template inline typename std::enable_if::value, - detail::pointee *>::type -lend(const Pointer &p) + detail::pointee*>::type +lend(const Pointer& p) { return p; } @@ -286,9 +286,9 @@ lend(const Pointer &p) * NotSupported. */ template -inline typename std::decay::type *as(U *obj) +inline typename std::decay::type* as(U* obj) { - if (auto p = dynamic_cast::type *>(obj)) { + if (auto p = dynamic_cast::type*>(obj)) { return p; } else { throw NotSupported(__FILE__, __LINE__, @@ -312,9 +312,9 @@ inline typename std::decay::type *as(U *obj) * NotSupported. */ template -inline const typename std::decay::type *as(const U *obj) +inline const typename std::decay::type* as(const U* obj) { - if (auto p = dynamic_cast::type *>(obj)) { + if (auto p = dynamic_cast::type*>(obj)) { return p; } else { throw NotSupported(__FILE__, __LINE__, @@ -339,9 +339,9 @@ inline const typename std::decay::type *as(const U *obj) */ template inline std::unique_ptr::type> as( - std::unique_ptr &&obj) + std::unique_ptr&& obj) { - if (auto p = dynamic_cast::type *>(obj.get())) { + if (auto p = dynamic_cast::type*>(obj.get())) { obj.release(); return std::unique_ptr::type>{p}; } else { @@ -412,7 +412,7 @@ inline std::shared_ptr::type> as( template class null_deleter { public: - using pointer = T *; + using pointer = T*; /** * Deletes the object. diff --git a/include/ginkgo/core/base/version.hpp b/include/ginkgo/core/base/version.hpp index bc9a7a478d8..2d41e366c0d 100644 --- a/include/ginkgo/core/base/version.hpp +++ b/include/ginkgo/core/base/version.hpp @@ -53,7 +53,7 @@ namespace gko { */ struct version { constexpr version(const uint64 major, const uint64 minor, - const uint64 patch, const char *tag) + const uint64 patch, const char* tag) : major{major}, minor{minor}, patch{patch}, tag{tag} {} @@ -77,21 +77,21 @@ struct version { * * It does not participate in comparisons. */ - const char *const tag; + const char* const tag; }; -inline bool operator==(const version &first, const version &second) +inline bool operator==(const version& first, const version& second) { return first.major == second.major && first.minor == second.minor && first.patch == second.patch; } -inline bool operator!=(const version &first, const version &second) +inline bool operator!=(const version& first, const version& second) { return !(first == second); } -inline bool operator<(const version &first, const version &second) +inline bool operator<(const version& first, const version& second) { if (first.major < second.major) return true; if (first.major == second.major && first.minor < second.minor) return true; @@ -101,17 +101,17 @@ inline bool operator<(const version &first, const version &second) return false; } -inline bool operator<=(const version &first, const version &second) +inline bool operator<=(const version& first, const version& second) { return !(second < first); } -inline bool operator>(const version &first, const version &second) +inline bool operator>(const version& first, const version& second) { return second < first; } -inline bool operator>=(const version &first, const version &second) +inline bool operator>=(const version& first, const version& second) { return !(first < second); } @@ -127,7 +127,7 @@ inline bool operator>=(const version &first, const version &second) * * @return os */ -inline std::ostream &operator<<(std::ostream &os, const version &ver) +inline std::ostream& operator<<(std::ostream& os, const version& ver) { os << ver.major << "." << ver.minor << "." << ver.patch; if (ver.tag) { @@ -165,7 +165,7 @@ class version_info { * * @return an instance of version info */ - static const version_info &get() + static const version_info& get() { static version_info info{}; return info; @@ -258,7 +258,7 @@ class version_info { * * @return os */ -std::ostream &operator<<(std::ostream &os, const version_info &ver_info); +std::ostream& operator<<(std::ostream& os, const version_info& ver_info); } // namespace gko diff --git a/include/ginkgo/core/factorization/ic.hpp b/include/ginkgo/core/factorization/ic.hpp index fb6da2d9764..032bb7881ee 100644 --- a/include/ginkgo/core/factorization/ic.hpp +++ b/include/ginkgo/core/factorization/ic.hpp @@ -96,7 +96,7 @@ class Ic : public Composition { // Remove the possibility of calling `create`, which was enabled by // `Composition` template - static std::unique_ptr> create(Args &&... args) = + static std::unique_ptr> create(Args&&... args) = delete; GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory) @@ -131,7 +131,7 @@ class Ic : public Composition { GKO_ENABLE_BUILD_METHOD(Factory); protected: - Ic(const Factory *factory, std::shared_ptr system_matrix) + Ic(const Factory* factory, std::shared_ptr system_matrix) : Composition{factory->get_executor()}, parameters_{factory->get_parameters()} { @@ -145,7 +145,7 @@ class Ic : public Composition { } std::unique_ptr> generate( - const std::shared_ptr &system_matrix, bool skip_sorting, + const std::shared_ptr& system_matrix, bool skip_sorting, bool both_factors) const; }; diff --git a/include/ginkgo/core/factorization/ilu.hpp b/include/ginkgo/core/factorization/ilu.hpp index 4e0bbef4008..3656509ec9d 100644 --- a/include/ginkgo/core/factorization/ilu.hpp +++ b/include/ginkgo/core/factorization/ilu.hpp @@ -91,7 +91,7 @@ class Ilu : public Composition { // Remove the possibility of calling `create`, which was enabled by // `Composition` template - static std::unique_ptr> create(Args &&... args) = + static std::unique_ptr> create(Args&&... args) = delete; GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory) @@ -126,7 +126,7 @@ class Ilu : public Composition { GKO_ENABLE_BUILD_METHOD(Factory); protected: - Ilu(const Factory *factory, std::shared_ptr system_matrix) + Ilu(const Factory* factory, std::shared_ptr system_matrix) : Composition{factory->get_executor()}, parameters_{factory->get_parameters()} { @@ -157,7 +157,7 @@ class Ilu : public Composition { * given system_matrix (first element is L, then U) */ std::unique_ptr> generate_l_u( - const std::shared_ptr &system_matrix, + const std::shared_ptr& system_matrix, bool skip_sorting) const; }; diff --git a/include/ginkgo/core/factorization/par_ic.hpp b/include/ginkgo/core/factorization/par_ic.hpp index ce94d3d32ae..7bc60c68b89 100644 --- a/include/ginkgo/core/factorization/par_ic.hpp +++ b/include/ginkgo/core/factorization/par_ic.hpp @@ -121,7 +121,7 @@ class ParIc : public Composition { // Remove the possibility of calling `create`, which was enabled by // `Composition` template - static std::unique_ptr> create(Args &&... args) = + static std::unique_ptr> create(Args&&... args) = delete; GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory) @@ -164,7 +164,7 @@ class ParIc : public Composition { GKO_ENABLE_BUILD_METHOD(Factory); protected: - explicit ParIc(const Factory *factory, + explicit ParIc(const Factory* factory, std::shared_ptr system_matrix) : Composition(factory->get_executor()), parameters_{factory->get_parameters()} @@ -179,7 +179,7 @@ class ParIc : public Composition { } std::unique_ptr> generate( - const std::shared_ptr &system_matrix, bool skip_sorting, + const std::shared_ptr& system_matrix, bool skip_sorting, bool both_factors) const; }; diff --git a/include/ginkgo/core/factorization/par_ict.hpp b/include/ginkgo/core/factorization/par_ict.hpp index cc113b8c807..07179b25263 100644 --- a/include/ginkgo/core/factorization/par_ict.hpp +++ b/include/ginkgo/core/factorization/par_ict.hpp @@ -116,7 +116,7 @@ class ParIct : public Composition { // Remove the possibility of calling `create`, which was enabled by // `Composition` template - static std::unique_ptr> create(Args &&... args) = + static std::unique_ptr> create(Args&&... args) = delete; GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory) @@ -213,7 +213,7 @@ class ParIct : public Composition { GKO_ENABLE_BUILD_METHOD(Factory); protected: - explicit ParIct(const Factory *factory, + explicit ParIct(const Factory* factory, std::shared_ptr system_matrix) : Composition(factory->get_executor()), parameters_{factory->get_parameters()} @@ -242,7 +242,7 @@ class ParIct : public Composition { * given system_matrix (first element is L, then L^T) */ std::unique_ptr> generate_l_lt( - const std::shared_ptr &system_matrix) const; + const std::shared_ptr& system_matrix) const; }; diff --git a/include/ginkgo/core/factorization/par_ilu.hpp b/include/ginkgo/core/factorization/par_ilu.hpp index bfd6884ebbf..18bf09696cd 100644 --- a/include/ginkgo/core/factorization/par_ilu.hpp +++ b/include/ginkgo/core/factorization/par_ilu.hpp @@ -119,7 +119,7 @@ class ParIlu : public Composition { // Remove the possibility of calling `create`, which was enabled by // `Composition` template - static std::unique_ptr> create(Args &&... args) = + static std::unique_ptr> create(Args&&... args) = delete; GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory) @@ -162,7 +162,7 @@ class ParIlu : public Composition { GKO_ENABLE_BUILD_METHOD(Factory); protected: - explicit ParIlu(const Factory *factory, + explicit ParIlu(const Factory* factory, std::shared_ptr system_matrix) : Composition(factory->get_executor()), parameters_{factory->get_parameters()} @@ -198,7 +198,7 @@ class ParIlu : public Composition { * given system_matrix (first element is L, then U) */ std::unique_ptr> generate_l_u( - const std::shared_ptr &system_matrix, bool skip_sorting, + const std::shared_ptr& system_matrix, bool skip_sorting, std::shared_ptr l_strategy, std::shared_ptr u_strategy) const; }; diff --git a/include/ginkgo/core/factorization/par_ilut.hpp b/include/ginkgo/core/factorization/par_ilut.hpp index 0cb54dfa73d..69da41f7b2a 100644 --- a/include/ginkgo/core/factorization/par_ilut.hpp +++ b/include/ginkgo/core/factorization/par_ilut.hpp @@ -121,7 +121,7 @@ class ParIlut : public Composition { // Remove the possibility of calling `create`, which was enabled by // `Composition` template - static std::unique_ptr> create(Args &&... args) = + static std::unique_ptr> create(Args&&... args) = delete; GKO_CREATE_FACTORY_PARAMETERS(parameters, Factory) @@ -219,7 +219,7 @@ class ParIlut : public Composition { GKO_ENABLE_BUILD_METHOD(Factory); protected: - explicit ParIlut(const Factory *factory, + explicit ParIlut(const Factory* factory, std::shared_ptr system_matrix) : Composition(factory->get_executor()), parameters_{factory->get_parameters()} @@ -248,7 +248,7 @@ class ParIlut : public Composition { * given system_matrix (first element is L, then U) */ std::unique_ptr> generate_l_u( - const std::shared_ptr &system_matrix) const; + const std::shared_ptr& system_matrix) const; }; diff --git a/include/ginkgo/core/log/convergence.hpp b/include/ginkgo/core/log/convergence.hpp index b3ceb5781f9..e0bb0920cd7 100644 --- a/include/ginkgo/core/log/convergence.hpp +++ b/include/ginkgo/core/log/convergence.hpp @@ -66,19 +66,19 @@ template class Convergence : public Logger { public: void on_criterion_check_completed( - const stop::Criterion *criterion, const size_type &num_iterations, - const LinOp *residual, const LinOp *residual_norm, - const LinOp *solution, const uint8 &stopping_id, - const bool &set_finalized, const Array *status, - const bool &one_changed, const bool &all_stopped) const override; + const stop::Criterion* criterion, const size_type& num_iterations, + const LinOp* residual, const LinOp* residual_norm, + const LinOp* solution, const uint8& stopping_id, + const bool& set_finalized, const Array* status, + const bool& one_changed, const bool& all_stopped) const override; void on_criterion_check_completed( - const stop::Criterion *criterion, const size_type &num_iterations, - const LinOp *residual, const LinOp *residual_norm, - const LinOp *implicit_sq_resnorm, const LinOp *solution, - const uint8 &stopping_id, const bool &set_finalized, - const Array *status, const bool &one_changed, - const bool &all_stopped) const override; + const stop::Criterion* criterion, const size_type& num_iterations, + const LinOp* residual, const LinOp* residual_norm, + const LinOp* implicit_sq_resnorm, const LinOp* solution, + const uint8& stopping_id, const bool& set_finalized, + const Array* status, const bool& one_changed, + const bool& all_stopped) const override; /** * Creates a convergence logger. This dynamically allocates the memory, @@ -96,7 +96,7 @@ class Convergence : public Logger { */ static std::unique_ptr create( std::shared_ptr exec, - const mask_type &enabled_events = Logger::all_events_mask) + const mask_type& enabled_events = Logger::all_events_mask) { return std::unique_ptr( new Convergence(exec, enabled_events)); @@ -119,7 +119,7 @@ class Convergence : public Logger { * * @return the number of iterations */ - const size_type &get_num_iterations() const noexcept + const size_type& get_num_iterations() const noexcept { return num_iterations_; } @@ -129,14 +129,14 @@ class Convergence : public Logger { * * @return the residual */ - const LinOp *get_residual() const noexcept { return residual_.get(); } + const LinOp* get_residual() const noexcept { return residual_.get(); } /** * Returns the residual norm * * @return the residual norm */ - const LinOp *get_residual_norm() const noexcept + const LinOp* get_residual_norm() const noexcept { return residual_norm_.get(); } @@ -146,7 +146,7 @@ class Convergence : public Logger { * * @return the implicit squared residual norm */ - const LinOp *get_implicit_sq_resnorm() const noexcept + const LinOp* get_implicit_sq_resnorm() const noexcept { return implicit_sq_resnorm_.get(); } @@ -161,7 +161,7 @@ class Convergence : public Logger { */ explicit Convergence( std::shared_ptr exec, - const mask_type &enabled_events = Logger::all_events_mask) + const mask_type& enabled_events = Logger::all_events_mask) : Logger(exec, enabled_events) {} diff --git a/include/ginkgo/core/log/logger.hpp b/include/ginkgo/core/log/logger.hpp index 4e0a033189d..b4d2822bb35 100644 --- a/include/ginkgo/core/log/logger.hpp +++ b/include/ginkgo/core/log/logger.hpp @@ -130,7 +130,7 @@ protected: \ public: \ template \ std::enable_if_t on( \ - Params &&... params) const \ + Params&&... params) const \ { \ if (enabled_events_ & (mask_type{1} << _id)) { \ this->on_##_event_name(std::forward(params)...); \ @@ -145,8 +145,8 @@ public: \ * @param exec the executor used * @param num_bytes the number of bytes to allocate */ - GKO_LOGGER_REGISTER_EVENT(0, allocation_started, const Executor *exec, - const size_type &num_bytes) + GKO_LOGGER_REGISTER_EVENT(0, allocation_started, const Executor* exec, + const size_type& num_bytes) /** * Executor's allocation completed event. @@ -155,9 +155,9 @@ public: \ * @param num_bytes the number of bytes allocated * @param location the address at which the data was allocated */ - GKO_LOGGER_REGISTER_EVENT(1, allocation_completed, const Executor *exec, - const size_type &num_bytes, - const uintptr &location) + GKO_LOGGER_REGISTER_EVENT(1, allocation_completed, const Executor* exec, + const size_type& num_bytes, + const uintptr& location) /** * Executor's free started event. @@ -165,8 +165,8 @@ public: \ * @param exec the executor used * @param location the address at which the data will be freed */ - GKO_LOGGER_REGISTER_EVENT(2, free_started, const Executor *exec, - const uintptr &location) + GKO_LOGGER_REGISTER_EVENT(2, free_started, const Executor* exec, + const uintptr& location) /** * Executor's free completed event. @@ -174,8 +174,8 @@ public: \ * @param exec the executor used * @param location the address at which the data was freed */ - GKO_LOGGER_REGISTER_EVENT(3, free_completed, const Executor *exec, - const uintptr &location) + GKO_LOGGER_REGISTER_EVENT(3, free_completed, const Executor* exec, + const uintptr& location) /** * Executor's copy started event. @@ -186,9 +186,9 @@ public: \ * @param loc_to the address at which the data will be copied to * @param num_bytes the number of bytes to be copied */ - GKO_LOGGER_REGISTER_EVENT(4, copy_started, const Executor *exec_from, - const Executor *exec_to, const uintptr &loc_from, - const uintptr &loc_to, const size_type &num_bytes) + GKO_LOGGER_REGISTER_EVENT(4, copy_started, const Executor* exec_from, + const Executor* exec_to, const uintptr& loc_from, + const uintptr& loc_to, const size_type& num_bytes) /** * Executor's copy completed event. @@ -199,9 +199,9 @@ public: \ * @param loc_to the address at which the data was copied to * @param num_bytes the number of bytes copied */ - GKO_LOGGER_REGISTER_EVENT(5, copy_completed, const Executor *exec_from, - const Executor *exec_to, const uintptr &loc_from, - const uintptr &loc_to, const size_type &num_bytes) + GKO_LOGGER_REGISTER_EVENT(5, copy_completed, const Executor* exec_from, + const Executor* exec_to, const uintptr& loc_from, + const uintptr& loc_to, const size_type& num_bytes) /** * Executor's operation launched event (method run). @@ -209,8 +209,8 @@ public: \ * @param exec the executor used * @param op the operation launched */ - GKO_LOGGER_REGISTER_EVENT(6, operation_launched, const Executor *exec, - const Operation *op) + GKO_LOGGER_REGISTER_EVENT(6, operation_launched, const Executor* exec, + const Operation* op) /** * Executor's operation completed event (method run). @@ -223,8 +223,8 @@ public: \ * the loggers will do lightweight logging, and therefore this operation for * the GPU just notes that the Operation has been sent to the GPU. */ - GKO_LOGGER_REGISTER_EVENT(7, operation_completed, const Executor *exec, - const Operation *op) + GKO_LOGGER_REGISTER_EVENT(7, operation_completed, const Executor* exec, + const Operation* op) /** * PolymorphicObject's create started event. @@ -233,7 +233,7 @@ public: \ * @param po the PolymorphicObject to be created */ GKO_LOGGER_REGISTER_EVENT(8, polymorphic_object_create_started, - const Executor *exec, const PolymorphicObject *po) + const Executor* exec, const PolymorphicObject* po) /** * PolymorphicObject's create completed event. @@ -243,9 +243,9 @@ public: \ * @param output the PolymorphicObject which was created */ GKO_LOGGER_REGISTER_EVENT(9, polymorphic_object_create_completed, - const Executor *exec, - const PolymorphicObject *input, - const PolymorphicObject *output) + const Executor* exec, + const PolymorphicObject* input, + const PolymorphicObject* output) /** * PolymorphicObject's copy started event. @@ -255,9 +255,9 @@ public: \ * @param output the PolymorphicObject to be copied to */ GKO_LOGGER_REGISTER_EVENT(10, polymorphic_object_copy_started, - const Executor *exec, - const PolymorphicObject *input, - const PolymorphicObject *output) + const Executor* exec, + const PolymorphicObject* input, + const PolymorphicObject* output) /** * PolymorphicObject's copy completed event. @@ -267,9 +267,9 @@ public: \ * @param output the PolymorphicObject to be copied to */ GKO_LOGGER_REGISTER_EVENT(11, polymorphic_object_copy_completed, - const Executor *exec, - const PolymorphicObject *input, - const PolymorphicObject *output) + const Executor* exec, + const PolymorphicObject* input, + const PolymorphicObject* output) /** * PolymorphicObject's deleted event. @@ -278,7 +278,7 @@ public: \ * @param po the PolymorphicObject to be deleted */ GKO_LOGGER_REGISTER_EVENT(12, polymorphic_object_deleted, - const Executor *exec, const PolymorphicObject *po) + const Executor* exec, const PolymorphicObject* po) /** * LinOp's apply started event. @@ -287,8 +287,8 @@ public: \ * @param b the input vector(s) * @param x the output vector(s) */ - GKO_LOGGER_REGISTER_EVENT(13, linop_apply_started, const LinOp *A, - const LinOp *b, const LinOp *x) + GKO_LOGGER_REGISTER_EVENT(13, linop_apply_started, const LinOp* A, + const LinOp* b, const LinOp* x) /** * LinOp's apply completed event. @@ -297,8 +297,8 @@ public: \ * @param b the input vector(s) * @param x the output vector(s) */ - GKO_LOGGER_REGISTER_EVENT(14, linop_apply_completed, const LinOp *A, - const LinOp *b, const LinOp *x) + GKO_LOGGER_REGISTER_EVENT(14, linop_apply_completed, const LinOp* A, + const LinOp* b, const LinOp* x) /** * LinOp's advanced apply started event. @@ -309,9 +309,9 @@ public: \ * @param beta scaling of the input x * @param x the output vector(s) */ - GKO_LOGGER_REGISTER_EVENT(15, linop_advanced_apply_started, const LinOp *A, - const LinOp *alpha, const LinOp *b, - const LinOp *beta, const LinOp *x) + GKO_LOGGER_REGISTER_EVENT(15, linop_advanced_apply_started, const LinOp* A, + const LinOp* alpha, const LinOp* b, + const LinOp* beta, const LinOp* x) /** * LinOp's advanced apply completed event. @@ -323,8 +323,8 @@ public: \ * @param x the output vector(s) */ GKO_LOGGER_REGISTER_EVENT(16, linop_advanced_apply_completed, - const LinOp *A, const LinOp *alpha, - const LinOp *b, const LinOp *beta, const LinOp *x) + const LinOp* A, const LinOp* alpha, + const LinOp* b, const LinOp* beta, const LinOp* x) /** * LinOp Factory's generate started event. @@ -334,7 +334,7 @@ public: \ * a system matrix) */ GKO_LOGGER_REGISTER_EVENT(17, linop_factory_generate_started, - const LinOpFactory *factory, const LinOp *input) + const LinOpFactory* factory, const LinOp* input) /** * LinOp Factory's generate completed event. @@ -345,8 +345,8 @@ public: \ * @param output the generated LinOp object */ GKO_LOGGER_REGISTER_EVENT(18, linop_factory_generate_completed, - const LinOpFactory *factory, const LinOp *input, - const LinOp *output) + const LinOpFactory* factory, const LinOp* input, + const LinOp* output) /** * stop::Criterion's check started event. @@ -360,11 +360,11 @@ public: \ * @param set_finalized whether this finalizes the iteration */ GKO_LOGGER_REGISTER_EVENT(19, criterion_check_started, - const stop::Criterion *criterion, - const size_type &it, const LinOp *r, - const LinOp *tau, const LinOp *x, - const uint8 &stopping_id, - const bool &set_finalized) + const stop::Criterion* criterion, + const size_type& it, const LinOp* r, + const LinOp* tau, const LinOp* x, + const uint8& stopping_id, + const bool& set_finalized) /** * stop::Criterion's check completed event. Parameters are the Criterion, @@ -387,11 +387,11 @@ public: \ * parameter as below. */ GKO_LOGGER_REGISTER_EVENT( - 20, criterion_check_completed, const stop::Criterion *criterion, - const size_type &it, const LinOp *r, const LinOp *tau, const LinOp *x, - const uint8 &stopping_id, const bool &set_finalized, - const Array *status, const bool &one_changed, - const bool &all_converged) + 20, criterion_check_completed, const stop::Criterion* criterion, + const size_type& it, const LinOp* r, const LinOp* tau, const LinOp* x, + const uint8& stopping_id, const bool& set_finalized, + const Array* status, const bool& one_changed, + const bool& all_converged) protected: /** * stop::Criterion's check completed event. Parameters are the Criterion, @@ -411,11 +411,11 @@ public: \ * @param all_converged whether all right hand sides */ virtual void on_criterion_check_completed( - const stop::Criterion *criterion, const size_type &it, const LinOp *r, - const LinOp *tau, const LinOp *implicit_tau_sq, const LinOp *x, - const uint8 &stopping_id, const bool &set_finalized, - const Array *status, const bool &one_changed, - const bool &all_converged) const + const stop::Criterion* criterion, const size_type& it, const LinOp* r, + const LinOp* tau, const LinOp* implicit_tau_sq, const LinOp* x, + const uint8& stopping_id, const bool& set_finalized, + const Array* status, const bool& one_changed, + const bool& all_converged) const { this->on_criterion_check_completed(criterion, it, r, tau, x, stopping_id, set_finalized, status, @@ -435,10 +435,10 @@ public: \ * deprecated. Please use the one with the additional implicit_tau_sq * parameter as below. */ - GKO_LOGGER_REGISTER_EVENT(21, iteration_complete, const LinOp *solver, - const size_type &it, const LinOp *r, - const LinOp *x = nullptr, - const LinOp *tau = nullptr) + GKO_LOGGER_REGISTER_EVENT(21, iteration_complete, const LinOp* solver, + const size_type& it, const LinOp* r, + const LinOp* x = nullptr, + const LinOp* tau = nullptr) protected: /** * Register the `iteration_complete` event which logs every completed @@ -450,10 +450,10 @@ public: \ * @param tau the residual norm (optional) * @param implicit_tau_sq the implicit residual norm squared (optional) */ - virtual void on_iteration_complete(const LinOp *solver, const size_type &it, - const LinOp *r, const LinOp *x, - const LinOp *tau, - const LinOp *implicit_tau_sq) const + virtual void on_iteration_complete(const LinOp* solver, const size_type& it, + const LinOp* r, const LinOp* x, + const LinOp* tau, + const LinOp* implicit_tau_sq) const { this->on_iteration_complete(solver, it, r, x, tau); } @@ -521,7 +521,7 @@ public: \ * event. */ explicit Logger(std::shared_ptr exec, - const mask_type &enabled_events = all_events_mask) + const mask_type& enabled_events = all_events_mask) : exec_{exec}, enabled_events_{enabled_events} {} @@ -556,14 +556,14 @@ class Loggable { * Thus, two loggers constructed in the same way are not considered * equal. */ - virtual void remove_logger(const Logger *logger) = 0; + virtual void remove_logger(const Logger* logger) = 0; /** * Returns the vector containing all loggers registered at this object. * * @return the vector containing all registered loggers. */ - virtual const std::vector> &get_loggers() + virtual const std::vector>& get_loggers() const = 0; /** Remove all loggers registered at this object. */ @@ -591,11 +591,11 @@ class EnableLogging : public PolymorphicBase { loggers_.push_back(logger); } - void remove_logger(const Logger *logger) override + void remove_logger(const Logger* logger) override { auto idx = find_if(begin(loggers_), end(loggers_), - [&logger](const auto &l) { return lend(l) == logger; }); + [&logger](const auto& l) { return lend(l) == logger; }); if (idx != end(loggers_)) { loggers_.erase(idx); } else { @@ -604,7 +604,7 @@ class EnableLogging : public PolymorphicBase { } } - const std::vector> &get_loggers() + const std::vector>& get_loggers() const override { return loggers_; @@ -614,9 +614,9 @@ class EnableLogging : public PolymorphicBase { protected: template - void log(Params &&... params) const + void log(Params&&... params) const { - for (auto &logger : loggers_) { + for (auto& logger : loggers_) { logger->template on(std::forward(params)...); } } diff --git a/include/ginkgo/core/log/papi.hpp b/include/ginkgo/core/log/papi.hpp index 436dc59d1ef..ce7f6da69b4 100644 --- a/include/ginkgo/core/log/papi.hpp +++ b/include/ginkgo/core/log/papi.hpp @@ -92,96 +92,96 @@ template class Papi : public Logger { public: /* Executor events */ - void on_allocation_started(const Executor *exec, - const size_type &num_bytes) const override; + void on_allocation_started(const Executor* exec, + const size_type& num_bytes) const override; - void on_allocation_completed(const Executor *exec, - const size_type &num_bytes, - const uintptr &location) const override; + void on_allocation_completed(const Executor* exec, + const size_type& num_bytes, + const uintptr& location) const override; - void on_free_started(const Executor *exec, - const uintptr &location) const override; + void on_free_started(const Executor* exec, + const uintptr& location) const override; - void on_free_completed(const Executor *exec, - const uintptr &location) const override; + void on_free_completed(const Executor* exec, + const uintptr& location) const override; - void on_copy_started(const Executor *from, const Executor *to, - const uintptr &location_from, - const uintptr &location_to, - const size_type &num_bytes) const override; + void on_copy_started(const Executor* from, const Executor* to, + const uintptr& location_from, + const uintptr& location_to, + const size_type& num_bytes) const override; - void on_copy_completed(const Executor *from, const Executor *to, - const uintptr &location_from, - const uintptr &location_to, - const size_type &num_bytes) const override; + void on_copy_completed(const Executor* from, const Executor* to, + const uintptr& location_from, + const uintptr& location_to, + const size_type& num_bytes) const override; /* Operation events */ - void on_operation_launched(const Executor *exec, - const Operation *operation) const override; + void on_operation_launched(const Executor* exec, + const Operation* operation) const override; - void on_operation_completed(const Executor *exec, - const Operation *operation) const override; + void on_operation_completed(const Executor* exec, + const Operation* operation) const override; /* PolymorphicObject events */ void on_polymorphic_object_create_started( - const Executor *, const PolymorphicObject *po) const override; + const Executor*, const PolymorphicObject* po) const override; void on_polymorphic_object_create_completed( - const Executor *exec, const PolymorphicObject *input, - const PolymorphicObject *output) const override; + const Executor* exec, const PolymorphicObject* input, + const PolymorphicObject* output) const override; void on_polymorphic_object_copy_started( - const Executor *exec, const PolymorphicObject *from, - const PolymorphicObject *to) const override; + const Executor* exec, const PolymorphicObject* from, + const PolymorphicObject* to) const override; void on_polymorphic_object_copy_completed( - const Executor *exec, const PolymorphicObject *from, - const PolymorphicObject *to) const override; + const Executor* exec, const PolymorphicObject* from, + const PolymorphicObject* to) const override; void on_polymorphic_object_deleted( - const Executor *exec, const PolymorphicObject *po) const override; + const Executor* exec, const PolymorphicObject* po) const override; /* LinOp events */ - void on_linop_apply_started(const LinOp *A, const LinOp *b, - const LinOp *x) const override; + void on_linop_apply_started(const LinOp* A, const LinOp* b, + const LinOp* x) const override; - void on_linop_apply_completed(const LinOp *A, const LinOp *b, - const LinOp *x) const override; + void on_linop_apply_completed(const LinOp* A, const LinOp* b, + const LinOp* x) const override; - void on_linop_advanced_apply_started(const LinOp *A, const LinOp *alpha, - const LinOp *b, const LinOp *beta, - const LinOp *x) const override; + void on_linop_advanced_apply_started(const LinOp* A, const LinOp* alpha, + const LinOp* b, const LinOp* beta, + const LinOp* x) const override; - void on_linop_advanced_apply_completed(const LinOp *A, const LinOp *alpha, - const LinOp *b, const LinOp *beta, - const LinOp *x) const override; + void on_linop_advanced_apply_completed(const LinOp* A, const LinOp* alpha, + const LinOp* b, const LinOp* beta, + const LinOp* x) const override; /* LinOpFactory events */ - void on_linop_factory_generate_started(const LinOpFactory *factory, - const LinOp *input) const override; + void on_linop_factory_generate_started(const LinOpFactory* factory, + const LinOp* input) const override; void on_linop_factory_generate_completed( - const LinOpFactory *factory, const LinOp *input, - const LinOp *output) const override; + const LinOpFactory* factory, const LinOp* input, + const LinOp* output) const override; void on_criterion_check_completed( - const stop::Criterion *criterion, const size_type &num_iterations, - const LinOp *residual, const LinOp *residual_norm, - const LinOp *solutino, const uint8 &stopping_id, - const bool &set_finalized, const Array *status, - const bool &one_changed, const bool &all_converged) const override; + const stop::Criterion* criterion, const size_type& num_iterations, + const LinOp* residual, const LinOp* residual_norm, + const LinOp* solutino, const uint8& stopping_id, + const bool& set_finalized, const Array* status, + const bool& one_changed, const bool& all_converged) const override; /* Internal solver events */ void on_iteration_complete( - const LinOp *solver, const size_type &num_iterations, - const LinOp *residual, const LinOp *solution = nullptr, - const LinOp *residual_norm = nullptr) const override; + const LinOp* solver, const size_type& num_iterations, + const LinOp* residual, const LinOp* solution = nullptr, + const LinOp* residual_norm = nullptr) const override; void on_iteration_complete( - const LinOp *solver, const size_type &num_iterations, - const LinOp *residual, const LinOp *solution, - const LinOp *residual_norm, - const LinOp *implicit_sq_residual_norm) const override; + const LinOp* solver, const size_type& num_iterations, + const LinOp* residual, const LinOp* solution, + const LinOp* residual_norm, + const LinOp* implicit_sq_residual_norm) const override; /** * Creates a Papi Logger. @@ -191,7 +191,7 @@ class Papi : public Logger { */ static std::shared_ptr create( std::shared_ptr exec, - const Logger::mask_type &enabled_events = Logger::all_events_mask) + const Logger::mask_type& enabled_events = Logger::all_events_mask) { return std::shared_ptr(new Papi(exec, enabled_events)); } @@ -207,7 +207,7 @@ class Papi : public Logger { protected: explicit Papi( std::shared_ptr exec, - const Logger::mask_type &enabled_events = Logger::all_events_mask) + const Logger::mask_type& enabled_events = Logger::all_events_mask) : Logger(exec, enabled_events) { std::ostringstream os; @@ -223,7 +223,7 @@ class Papi : public Logger { template class papi_queue { public: - papi_queue(papi_handle_t *handle, const char *counter_name) + papi_queue(papi_handle_t* handle, const char* counter_name) : handle{handle}, counter_name{counter_name} {} @@ -239,13 +239,13 @@ class Papi : public Logger { data.clear(); } - size_type &get_counter(const PointerType *ptr) + size_type& get_counter(const PointerType* ptr) { const auto tmp = reinterpret_cast(ptr); if (data.find(tmp) == data.end()) { data[tmp] = 0; } - auto &value = data[tmp]; + auto& value = data[tmp]; if (!value) { std::ostringstream oss; oss << counter_name << "::" << tmp; @@ -257,8 +257,8 @@ class Papi : public Logger { } private: - papi_handle_t *handle; - const char *counter_name; + papi_handle_t* handle; + const char* counter_name; std::map data; }; @@ -308,7 +308,7 @@ class Papi : public Logger { mutable papi_queue linop_advanced_apply_completed{ &papi_handle, "linop_advanced_apply_completed"}; - mutable std::map criterion_check_completed; + mutable std::map criterion_check_completed; mutable papi_queue iteration_complete{&papi_handle, "iteration_complete"}; diff --git a/include/ginkgo/core/log/record.hpp b/include/ginkgo/core/log/record.hpp index 71b31e9e794..6b48edd44df 100644 --- a/include/ginkgo/core/log/record.hpp +++ b/include/ginkgo/core/log/record.hpp @@ -64,11 +64,11 @@ struct iteration_complete_data { std::unique_ptr residual_norm; std::unique_ptr implicit_sq_residual_norm; - iteration_complete_data(const LinOp *solver, const size_type num_iterations, - const LinOp *residual = nullptr, - const LinOp *solution = nullptr, - const LinOp *residual_norm = nullptr, - const LinOp *implicit_sq_residual_norm = nullptr) + iteration_complete_data(const LinOp* solver, const size_type num_iterations, + const LinOp* residual = nullptr, + const LinOp* solution = nullptr, + const LinOp* residual_norm = nullptr, + const LinOp* implicit_sq_residual_norm = nullptr) : solver{nullptr}, num_iterations{num_iterations}, residual{nullptr}, @@ -98,7 +98,7 @@ struct iteration_complete_data { * Struct representing Executor related data */ struct executor_data { - const Executor *exec; + const Executor* exec; const size_type num_bytes; const uintptr location; }; @@ -108,8 +108,8 @@ struct executor_data { * Struct representing Operator related data */ struct operation_data { - const Executor *exec; - const Operation *operation; + const Executor* exec; + const Operation* operation; }; @@ -117,13 +117,13 @@ struct operation_data { * Struct representing PolymorphicObject related data */ struct polymorphic_object_data { - const Executor *exec; + const Executor* exec; std::unique_ptr input; std::unique_ptr output; // optional - polymorphic_object_data(const Executor *exec, - const PolymorphicObject *input, - const PolymorphicObject *output = nullptr) + polymorphic_object_data(const Executor* exec, + const PolymorphicObject* input, + const PolymorphicObject* output = nullptr) : exec{exec} { this->input = input->clone(); @@ -144,8 +144,8 @@ struct linop_data { std::unique_ptr beta; std::unique_ptr x; - linop_data(const LinOp *A, const LinOp *alpha, const LinOp *b, - const LinOp *beta, const LinOp *x) + linop_data(const LinOp* A, const LinOp* alpha, const LinOp* b, + const LinOp* beta, const LinOp* x) { this->A = A->clone(); if (alpha != nullptr) { @@ -164,12 +164,12 @@ struct linop_data { * Struct representing LinOp factory related data */ struct linop_factory_data { - const LinOpFactory *factory; + const LinOpFactory* factory; std::unique_ptr input; std::unique_ptr output; - linop_factory_data(const LinOpFactory *factory, const LinOp *input, - const LinOp *output) + linop_factory_data(const LinOpFactory* factory, const LinOp* input, + const LinOp* output) : factory{factory} { this->input = input->clone(); @@ -184,22 +184,22 @@ struct linop_factory_data { * Struct representing Criterion related data */ struct criterion_data { - const stop::Criterion *criterion; + const stop::Criterion* criterion; const size_type num_iterations; std::unique_ptr residual; std::unique_ptr residual_norm; std::unique_ptr solution; const uint8 stopping_id; const bool set_finalized; - const Array *status; + const Array* status; const bool oneChanged; const bool converged; - criterion_data(const stop::Criterion *criterion, - const size_type &num_iterations, const LinOp *residual, - const LinOp *residual_norm, const LinOp *solution, + criterion_data(const stop::Criterion* criterion, + const size_type& num_iterations, const LinOp* residual, + const LinOp* residual_norm, const LinOp* solution, const uint8 stopping_id, const bool set_finalized, - const Array *status = nullptr, + const Array* status = nullptr, const bool oneChanged = false, const bool converged = false) : criterion{criterion}, num_iterations{num_iterations}, @@ -282,113 +282,113 @@ class Record : public Logger { }; /* Executor events */ - void on_allocation_started(const Executor *exec, - const size_type &num_bytes) const override; + void on_allocation_started(const Executor* exec, + const size_type& num_bytes) const override; - void on_allocation_completed(const Executor *exec, - const size_type &num_bytes, - const uintptr &location) const override; + void on_allocation_completed(const Executor* exec, + const size_type& num_bytes, + const uintptr& location) const override; - void on_free_started(const Executor *exec, - const uintptr &location) const override; + void on_free_started(const Executor* exec, + const uintptr& location) const override; - void on_free_completed(const Executor *exec, - const uintptr &location) const override; + void on_free_completed(const Executor* exec, + const uintptr& location) const override; - void on_copy_started(const Executor *from, const Executor *to, - const uintptr &location_from, - const uintptr &location_to, - const size_type &num_bytes) const override; + void on_copy_started(const Executor* from, const Executor* to, + const uintptr& location_from, + const uintptr& location_to, + const size_type& num_bytes) const override; - void on_copy_completed(const Executor *from, const Executor *to, - const uintptr &location_from, - const uintptr &location_to, - const size_type &num_bytes) const override; + void on_copy_completed(const Executor* from, const Executor* to, + const uintptr& location_from, + const uintptr& location_to, + const size_type& num_bytes) const override; /* Operation events */ - void on_operation_launched(const Executor *exec, - const Operation *operation) const override; + void on_operation_launched(const Executor* exec, + const Operation* operation) const override; - void on_operation_completed(const Executor *exec, - const Operation *operation) const override; + void on_operation_completed(const Executor* exec, + const Operation* operation) const override; /* PolymorphicObject events */ void on_polymorphic_object_create_started( - const Executor *exec, const PolymorphicObject *po) const override; + const Executor* exec, const PolymorphicObject* po) const override; void on_polymorphic_object_create_completed( - const Executor *exec, const PolymorphicObject *input, - const PolymorphicObject *output) const override; + const Executor* exec, const PolymorphicObject* input, + const PolymorphicObject* output) const override; void on_polymorphic_object_copy_started( - const Executor *exec, const PolymorphicObject *from, - const PolymorphicObject *to) const override; + const Executor* exec, const PolymorphicObject* from, + const PolymorphicObject* to) const override; void on_polymorphic_object_copy_completed( - const Executor *exec, const PolymorphicObject *from, - const PolymorphicObject *to) const override; + const Executor* exec, const PolymorphicObject* from, + const PolymorphicObject* to) const override; void on_polymorphic_object_deleted( - const Executor *exec, const PolymorphicObject *po) const override; + const Executor* exec, const PolymorphicObject* po) const override; /* LinOp events */ - void on_linop_apply_started(const LinOp *A, const LinOp *b, - const LinOp *x) const override; + void on_linop_apply_started(const LinOp* A, const LinOp* b, + const LinOp* x) const override; - void on_linop_apply_completed(const LinOp *A, const LinOp *b, - const LinOp *x) const override; + void on_linop_apply_completed(const LinOp* A, const LinOp* b, + const LinOp* x) const override; - void on_linop_advanced_apply_started(const LinOp *A, const LinOp *alpha, - const LinOp *b, const LinOp *beta, - const LinOp *x) const override; + void on_linop_advanced_apply_started(const LinOp* A, const LinOp* alpha, + const LinOp* b, const LinOp* beta, + const LinOp* x) const override; - void on_linop_advanced_apply_completed(const LinOp *A, const LinOp *alpha, - const LinOp *b, const LinOp *beta, - const LinOp *x) const override; + void on_linop_advanced_apply_completed(const LinOp* A, const LinOp* alpha, + const LinOp* b, const LinOp* beta, + const LinOp* x) const override; /* LinOpFactory events */ - void on_linop_factory_generate_started(const LinOpFactory *factory, - const LinOp *input) const override; + void on_linop_factory_generate_started(const LinOpFactory* factory, + const LinOp* input) const override; void on_linop_factory_generate_completed( - const LinOpFactory *factory, const LinOp *input, - const LinOp *output) const override; + const LinOpFactory* factory, const LinOp* input, + const LinOp* output) const override; /* Criterion events */ - void on_criterion_check_started(const stop::Criterion *criterion, - const size_type &num_iterations, - const LinOp *residual, - const LinOp *residual_norm, - const LinOp *solution, - const uint8 &stopping_id, - const bool &set_finalized) const override; + void on_criterion_check_started(const stop::Criterion* criterion, + const size_type& num_iterations, + const LinOp* residual, + const LinOp* residual_norm, + const LinOp* solution, + const uint8& stopping_id, + const bool& set_finalized) const override; void on_criterion_check_completed( - const stop::Criterion *criterion, const size_type &num_iterations, - const LinOp *residual, const LinOp *residual_norm, - const LinOp *implicit_residual_norm_sq, const LinOp *solution, - const uint8 &stopping_id, const bool &set_finalized, - const Array *status, const bool &one_changed, - const bool &all_converged) const override; + const stop::Criterion* criterion, const size_type& num_iterations, + const LinOp* residual, const LinOp* residual_norm, + const LinOp* implicit_residual_norm_sq, const LinOp* solution, + const uint8& stopping_id, const bool& set_finalized, + const Array* status, const bool& one_changed, + const bool& all_converged) const override; void on_criterion_check_completed( - const stop::Criterion *criterion, const size_type &num_iterations, - const LinOp *residual, const LinOp *residual_norm, - const LinOp *solution, const uint8 &stopping_id, - const bool &set_finalized, const Array *status, - const bool &one_changed, const bool &all_converged) const override; + const stop::Criterion* criterion, const size_type& num_iterations, + const LinOp* residual, const LinOp* residual_norm, + const LinOp* solution, const uint8& stopping_id, + const bool& set_finalized, const Array* status, + const bool& one_changed, const bool& all_converged) const override; /* Internal solver events */ void on_iteration_complete( - const LinOp *solver, const size_type &num_iterations, - const LinOp *residual, const LinOp *solution = nullptr, - const LinOp *residual_norm = nullptr) const override; + const LinOp* solver, const size_type& num_iterations, + const LinOp* residual, const LinOp* solution = nullptr, + const LinOp* residual_norm = nullptr) const override; void on_iteration_complete( - const LinOp *solver, const size_type &num_iterations, - const LinOp *residual, const LinOp *solution, - const LinOp *residual_norm, - const LinOp *implicit_sq_residual_norm) const override; + const LinOp* solver, const size_type& num_iterations, + const LinOp* residual, const LinOp* solution, + const LinOp* residual_norm, + const LinOp* implicit_sq_residual_norm) const override; /** * Creates a Record logger. This dynamically allocates the memory, @@ -410,7 +410,7 @@ class Record : public Logger { */ static std::unique_ptr create( std::shared_ptr exec, - const mask_type &enabled_events = Logger::all_events_mask, + const mask_type& enabled_events = Logger::all_events_mask, size_type max_storage = 1) { return std::unique_ptr( @@ -422,12 +422,12 @@ class Record : public Logger { * * @return the logged data */ - const logged_data &get() const noexcept { return data_; } + const logged_data& get() const noexcept { return data_; } /** * @copydoc ::get() */ - logged_data &get() noexcept { return data_; } + logged_data& get() noexcept { return data_; } protected: /** @@ -442,7 +442,7 @@ class Record : public Logger { * memory overhead of this logger. */ explicit Record(std::shared_ptr exec, - const mask_type &enabled_events = Logger::all_events_mask, + const mask_type& enabled_events = Logger::all_events_mask, size_type max_storage = 0) : Logger(exec, enabled_events), max_storage_{max_storage} {} @@ -456,7 +456,7 @@ class Record : public Logger { * @param object the object to append */ template - void append_deque(std::deque &deque, deque_type object) const + void append_deque(std::deque& deque, deque_type object) const { if (this->max_storage_ && deque.size() == this->max_storage_) { deque.pop_front(); diff --git a/include/ginkgo/core/log/stream.hpp b/include/ginkgo/core/log/stream.hpp index 664ee5a47e4..be7dd2e5d79 100644 --- a/include/ginkgo/core/log/stream.hpp +++ b/include/ginkgo/core/log/stream.hpp @@ -59,105 +59,105 @@ template class Stream : public Logger { public: /* Executor events */ - void on_allocation_started(const Executor *exec, - const size_type &num_bytes) const override; + void on_allocation_started(const Executor* exec, + const size_type& num_bytes) const override; - void on_allocation_completed(const Executor *exec, - const size_type &num_bytes, - const uintptr &location) const override; + void on_allocation_completed(const Executor* exec, + const size_type& num_bytes, + const uintptr& location) const override; - void on_free_started(const Executor *exec, - const uintptr &location) const override; + void on_free_started(const Executor* exec, + const uintptr& location) const override; - void on_free_completed(const Executor *exec, - const uintptr &location) const override; + void on_free_completed(const Executor* exec, + const uintptr& location) const override; - void on_copy_started(const Executor *from, const Executor *to, - const uintptr &location_from, - const uintptr &location_to, - const size_type &num_bytes) const override; + void on_copy_started(const Executor* from, const Executor* to, + const uintptr& location_from, + const uintptr& location_to, + const size_type& num_bytes) const override; - void on_copy_completed(const Executor *from, const Executor *to, - const uintptr &location_from, - const uintptr &location_to, - const size_type &num_bytes) const override; + void on_copy_completed(const Executor* from, const Executor* to, + const uintptr& location_from, + const uintptr& location_to, + const size_type& num_bytes) const override; /* Operation events */ - void on_operation_launched(const Executor *exec, - const Operation *operation) const override; + void on_operation_launched(const Executor* exec, + const Operation* operation) const override; - void on_operation_completed(const Executor *exec, - const Operation *operation) const override; + void on_operation_completed(const Executor* exec, + const Operation* operation) const override; /* PolymorphicObject events */ void on_polymorphic_object_create_started( - const Executor *, const PolymorphicObject *po) const override; + const Executor*, const PolymorphicObject* po) const override; void on_polymorphic_object_create_completed( - const Executor *exec, const PolymorphicObject *input, - const PolymorphicObject *output) const override; + const Executor* exec, const PolymorphicObject* input, + const PolymorphicObject* output) const override; void on_polymorphic_object_copy_started( - const Executor *exec, const PolymorphicObject *from, - const PolymorphicObject *to) const override; + const Executor* exec, const PolymorphicObject* from, + const PolymorphicObject* to) const override; void on_polymorphic_object_copy_completed( - const Executor *exec, const PolymorphicObject *from, - const PolymorphicObject *to) const override; + const Executor* exec, const PolymorphicObject* from, + const PolymorphicObject* to) const override; void on_polymorphic_object_deleted( - const Executor *exec, const PolymorphicObject *po) const override; + const Executor* exec, const PolymorphicObject* po) const override; /* LinOp events */ - void on_linop_apply_started(const LinOp *A, const LinOp *b, - const LinOp *x) const override; + void on_linop_apply_started(const LinOp* A, const LinOp* b, + const LinOp* x) const override; - void on_linop_apply_completed(const LinOp *A, const LinOp *b, - const LinOp *x) const override; + void on_linop_apply_completed(const LinOp* A, const LinOp* b, + const LinOp* x) const override; - void on_linop_advanced_apply_started(const LinOp *A, const LinOp *alpha, - const LinOp *b, const LinOp *beta, - const LinOp *x) const override; + void on_linop_advanced_apply_started(const LinOp* A, const LinOp* alpha, + const LinOp* b, const LinOp* beta, + const LinOp* x) const override; - void on_linop_advanced_apply_completed(const LinOp *A, const LinOp *alpha, - const LinOp *b, const LinOp *beta, - const LinOp *x) const override; + void on_linop_advanced_apply_completed(const LinOp* A, const LinOp* alpha, + const LinOp* b, const LinOp* beta, + const LinOp* x) const override; /* LinOpFactory events */ - void on_linop_factory_generate_started(const LinOpFactory *factory, - const LinOp *input) const override; + void on_linop_factory_generate_started(const LinOpFactory* factory, + const LinOp* input) const override; void on_linop_factory_generate_completed( - const LinOpFactory *factory, const LinOp *input, - const LinOp *output) const override; + const LinOpFactory* factory, const LinOp* input, + const LinOp* output) const override; /* Criterion events */ - void on_criterion_check_started(const stop::Criterion *criterion, - const size_type &num_iterations, - const LinOp *residual, - const LinOp *residual_norm, - const LinOp *solution, - const uint8 &stopping_id, - const bool &set_finalized) const override; + void on_criterion_check_started(const stop::Criterion* criterion, + const size_type& num_iterations, + const LinOp* residual, + const LinOp* residual_norm, + const LinOp* solution, + const uint8& stopping_id, + const bool& set_finalized) const override; void on_criterion_check_completed( - const stop::Criterion *criterion, const size_type &num_iterations, - const LinOp *residual, const LinOp *residual_norm, - const LinOp *solutino, const uint8 &stopping_id, - const bool &set_finalized, const Array *status, - const bool &one_changed, const bool &all_converged) const override; + const stop::Criterion* criterion, const size_type& num_iterations, + const LinOp* residual, const LinOp* residual_norm, + const LinOp* solutino, const uint8& stopping_id, + const bool& set_finalized, const Array* status, + const bool& one_changed, const bool& all_converged) const override; /* Internal solver events */ void on_iteration_complete( - const LinOp *solver, const size_type &num_iterations, - const LinOp *residual, const LinOp *solution = nullptr, - const LinOp *residual_norm = nullptr) const override; + const LinOp* solver, const size_type& num_iterations, + const LinOp* residual, const LinOp* solution = nullptr, + const LinOp* residual_norm = nullptr) const override; void on_iteration_complete( - const LinOp *solver, const size_type &num_iterations, - const LinOp *residual, const LinOp *solution, - const LinOp *residual_norm, - const LinOp *implicit_sq_residual_norm) const override; + const LinOp* solver, const size_type& num_iterations, + const LinOp* residual, const LinOp* solution, + const LinOp* residual_norm, + const LinOp* implicit_sq_residual_norm) const override; /** * Creates a Stream logger. This dynamically allocates the memory, @@ -179,8 +179,8 @@ class Stream : public Logger { */ static std::unique_ptr create( std::shared_ptr exec, - const Logger::mask_type &enabled_events = Logger::all_events_mask, - std::ostream &os = std::cout, bool verbose = false) + const Logger::mask_type& enabled_events = Logger::all_events_mask, + std::ostream& os = std::cout, bool verbose = false) { return std::unique_ptr( new Stream(exec, enabled_events, os, verbose)); @@ -200,15 +200,15 @@ class Stream : public Logger { */ explicit Stream( std::shared_ptr exec, - const Logger::mask_type &enabled_events = Logger::all_events_mask, - std::ostream &os = std::cout, bool verbose = false) + const Logger::mask_type& enabled_events = Logger::all_events_mask, + std::ostream& os = std::cout, bool verbose = false) : Logger(exec, enabled_events), os_(os), verbose_(verbose) {} private: - std::ostream &os_; - static constexpr const char *prefix_ = "[LOG] >>> "; + std::ostream& os_; + static constexpr const char* prefix_ = "[LOG] >>> "; bool verbose_; }; diff --git a/include/ginkgo/core/matrix/coo.hpp b/include/ginkgo/core/matrix/coo.hpp index f0872f32787..ac2a501799c 100644 --- a/include/ginkgo/core/matrix/coo.hpp +++ b/include/ginkgo/core/matrix/coo.hpp @@ -104,21 +104,21 @@ class Coo : public EnableLinOp>, friend class Coo, IndexType>; void convert_to( - Coo, IndexType> *result) const override; + Coo, IndexType>* result) const override; - void move_to(Coo, IndexType> *result) override; + void move_to(Coo, IndexType>* result) override; - void convert_to(Csr *other) const override; + void convert_to(Csr* other) const override; - void move_to(Csr *other) override; + void move_to(Csr* other) override; - void convert_to(Dense *other) const override; + void convert_to(Dense* other) const override; - void move_to(Dense *other) override; + void move_to(Dense* other) override; - void read(const mat_data &data) override; + void read(const mat_data& data) override; - void write(mat_data &data) const override; + void write(mat_data& data) const override; std::unique_ptr> extract_diagonal() const override; @@ -131,7 +131,7 @@ class Coo : public EnableLinOp>, * * @return the values of the matrix. */ - value_type *get_values() noexcept { return values_.get_data(); } + value_type* get_values() noexcept { return values_.get_data(); } /** * @copydoc Csr::get_values() @@ -140,7 +140,7 @@ class Coo : public EnableLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type *get_const_values() const noexcept + const value_type* get_const_values() const noexcept { return values_.get_const_data(); } @@ -150,7 +150,7 @@ class Coo : public EnableLinOp>, * * @return the column indexes of the matrix. */ - index_type *get_col_idxs() noexcept { return col_idxs_.get_data(); } + index_type* get_col_idxs() noexcept { return col_idxs_.get_data(); } /** * @copydoc Csr::get_col_idxs() @@ -159,7 +159,7 @@ class Coo : public EnableLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type *get_const_col_idxs() const noexcept + const index_type* get_const_col_idxs() const noexcept { return col_idxs_.get_const_data(); } @@ -169,7 +169,7 @@ class Coo : public EnableLinOp>, * * @return the row indexes of the matrix. */ - index_type *get_row_idxs() noexcept { return row_idxs_.get_data(); } + index_type* get_row_idxs() noexcept { return row_idxs_.get_data(); } /** * @copydoc Csr::get_row_idxs() @@ -178,7 +178,7 @@ class Coo : public EnableLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type *get_const_row_idxs() const noexcept + const index_type* get_const_row_idxs() const noexcept { return row_idxs_.get_const_data(); } @@ -203,7 +203,7 @@ class Coo : public EnableLinOp>, * * @return this */ - LinOp *apply2(const LinOp *b, LinOp *x) + LinOp* apply2(const LinOp* b, LinOp* x) { this->validate_application_parameters(b, x); auto exec = this->get_executor(); @@ -215,7 +215,7 @@ class Coo : public EnableLinOp>, /** * @copydoc apply2(cost LinOp *, LinOp *) */ - const LinOp *apply2(const LinOp *b, LinOp *x) const + const LinOp* apply2(const LinOp* b, LinOp* x) const { this->validate_application_parameters(b, x); auto exec = this->get_executor(); @@ -233,7 +233,7 @@ class Coo : public EnableLinOp>, * * @return this */ - LinOp *apply2(const LinOp *alpha, const LinOp *b, LinOp *x) + LinOp* apply2(const LinOp* alpha, const LinOp* b, LinOp* x) { this->validate_application_parameters(b, x); GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1)); @@ -247,7 +247,7 @@ class Coo : public EnableLinOp>, /** * @copydoc apply2(const LinOp *, const LinOp *, LinOp *) */ - const LinOp *apply2(const LinOp *alpha, const LinOp *b, LinOp *x) const + const LinOp* apply2(const LinOp* alpha, const LinOp* b, LinOp* x) const { this->validate_application_parameters(b, x); GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1)); @@ -266,7 +266,7 @@ class Coo : public EnableLinOp>, * @param size size of the matrix * @param num_nonzeros number of nonzeros */ - Coo(std::shared_ptr exec, const dim<2> &size = dim<2>{}, + Coo(std::shared_ptr exec, const dim<2>& size = dim<2>{}, size_type num_nonzeros = {}) : EnableLinOp(exec, size), values_(exec, num_nonzeros), @@ -296,8 +296,8 @@ class Coo : public EnableLinOp>, */ template - Coo(std::shared_ptr exec, const dim<2> &size, - ValuesArray &&values, ColIdxsArray &&col_idxs, RowIdxsArray &&row_idxs) + Coo(std::shared_ptr exec, const dim<2>& size, + ValuesArray&& values, ColIdxsArray&& col_idxs, RowIdxsArray&& row_idxs) : EnableLinOp(exec, size), values_{exec, std::forward(values)}, col_idxs_{exec, std::forward(col_idxs)}, @@ -307,14 +307,14 @@ class Coo : public EnableLinOp>, GKO_ASSERT_EQ(values_.get_num_elems(), row_idxs_.get_num_elems()); } - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; - void apply2_impl(const LinOp *b, LinOp *x) const; + void apply2_impl(const LinOp* b, LinOp* x) const; - void apply2_impl(const LinOp *alpha, const LinOp *b, LinOp *x) const; + void apply2_impl(const LinOp* alpha, const LinOp* b, LinOp* x) const; private: Array values_; diff --git a/include/ginkgo/core/matrix/csr.hpp b/include/ginkgo/core/matrix/csr.hpp index 7bf13f459fd..48c3f9ef494 100644 --- a/include/ginkgo/core/matrix/csr.hpp +++ b/include/ginkgo/core/matrix/csr.hpp @@ -72,7 +72,7 @@ namespace detail { template -void strategy_rebuild_helper(Csr *result); +void strategy_rebuild_helper(Csr* result); } // namespace detail @@ -185,8 +185,8 @@ class Csr : public EnableLinOp>, * @param mtx_row_ptrs the row pointers of the matrix * @param mtx_srow the srow of the matrix */ - virtual void process(const Array &mtx_row_ptrs, - Array *mtx_srow) = 0; + virtual void process(const Array& mtx_row_ptrs, + Array* mtx_srow) = 0; /** * Computes the srow size according to the number of nonzeros. @@ -223,14 +223,14 @@ class Csr : public EnableLinOp>, */ classical() : strategy_type("classical"), max_length_per_row_(0) {} - void process(const Array &mtx_row_ptrs, - Array *mtx_srow) override + void process(const Array& mtx_row_ptrs, + Array* mtx_srow) override { auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master(); Array row_ptrs_host(host_mtx_exec); const bool is_mtx_on_host{host_mtx_exec == mtx_row_ptrs.get_executor()}; - const index_type *row_ptrs{}; + const index_type* row_ptrs{}; if (is_mtx_on_host) { row_ptrs = mtx_row_ptrs.get_const_data(); } else { @@ -273,8 +273,8 @@ class Csr : public EnableLinOp>, */ merge_path() : strategy_type("merge_path") {} - void process(const Array &mtx_row_ptrs, - Array *mtx_srow) override + void process(const Array& mtx_row_ptrs, + Array* mtx_srow) override {} int64_t clac_size(const int64_t nnz) override { return 0; } @@ -298,8 +298,8 @@ class Csr : public EnableLinOp>, */ cusparse() : strategy_type("cusparse") {} - void process(const Array &mtx_row_ptrs, - Array *mtx_srow) override + void process(const Array& mtx_row_ptrs, + Array* mtx_srow) override {} int64_t clac_size(const int64_t nnz) override { return 0; } @@ -322,8 +322,8 @@ class Csr : public EnableLinOp>, */ sparselib() : strategy_type("sparselib") {} - void process(const Array &mtx_row_ptrs, - Array *mtx_srow) override + void process(const Array& mtx_row_ptrs, + Array* mtx_srow) override {} int64_t clac_size(const int64_t nnz) override { return 0; } @@ -399,8 +399,8 @@ class Csr : public EnableLinOp>, strategy_name_(strategy_name) {} - void process(const Array &mtx_row_ptrs, - Array *mtx_srow) override + void process(const Array& mtx_row_ptrs, + Array* mtx_srow) override { auto nwarps = mtx_srow->get_num_elems(); @@ -413,8 +413,8 @@ class Csr : public EnableLinOp>, mtx_row_ptrs.get_executor()}; Array row_ptrs_host(host_mtx_exec); Array srow_host(host_srow_exec); - const index_type *row_ptrs{}; - index_type *srow{}; + const index_type* row_ptrs{}; + index_type* srow{}; if (is_srow_on_host) { srow = mtx_srow->get_data(); } else { @@ -579,8 +579,8 @@ class Csr : public EnableLinOp>, max_length_per_row_(0) {} - void process(const Array &mtx_row_ptrs, - Array *mtx_srow) override + void process(const Array& mtx_row_ptrs, + Array* mtx_srow) override { // if the number of stored elements is larger than or // the maximum number of stored elements per row is larger than @@ -605,7 +605,7 @@ class Csr : public EnableLinOp>, const bool is_mtx_on_host{host_mtx_exec == mtx_row_ptrs.get_executor()}; Array row_ptrs_host(host_mtx_exec); - const index_type *row_ptrs{}; + const index_type* row_ptrs{}; if (is_mtx_on_host) { row_ptrs = mtx_row_ptrs.get_const_data(); } else { @@ -678,7 +678,7 @@ class Csr : public EnableLinOp>, index_type max_length_per_row_; }; - void convert_to(Csr *result) const override + void convert_to(Csr* result) const override { bool same_executor = this->get_executor() == result->get_executor(); // NOTE: as soon as strategies are improved, this can be reverted @@ -695,7 +695,7 @@ class Csr : public EnableLinOp>, // END NOTE } - void move_to(Csr *result) override + void move_to(Csr* result) override { bool same_executor = this->get_executor() == result->get_executor(); EnableLinOp::move_to(result); @@ -707,59 +707,59 @@ class Csr : public EnableLinOp>, friend class Csr, IndexType>; void convert_to( - Csr, IndexType> *result) const override; + Csr, IndexType>* result) const override; - void move_to(Csr, IndexType> *result) override; + void move_to(Csr, IndexType>* result) override; - void convert_to(Dense *other) const override; + void convert_to(Dense* other) const override; - void move_to(Dense *other) override; + void move_to(Dense* other) override; - void convert_to(Coo *result) const override; + void convert_to(Coo* result) const override; - void move_to(Coo *result) override; + void move_to(Coo* result) override; - void convert_to(Ell *result) const override; + void convert_to(Ell* result) const override; - void move_to(Ell *result) override; + void move_to(Ell* result) override; - void convert_to(Hybrid *result) const override; + void convert_to(Hybrid* result) const override; - void move_to(Hybrid *result) override; + void move_to(Hybrid* result) override; - void convert_to(Sellp *result) const override; + void convert_to(Sellp* result) const override; - void move_to(Sellp *result) override; + void move_to(Sellp* result) override; - void convert_to(SparsityCsr *result) const override; + void convert_to(SparsityCsr* result) const override; - void move_to(SparsityCsr *result) override; + void move_to(SparsityCsr* result) override; - void read(const mat_data &data) override; + void read(const mat_data& data) override; - void write(mat_data &data) const override; + void write(mat_data& data) const override; std::unique_ptr transpose() const override; std::unique_ptr conj_transpose() const override; std::unique_ptr permute( - const Array *permutation_indices) const override; + const Array* permutation_indices) const override; std::unique_ptr inverse_permute( - const Array *inverse_permutation_indices) const override; + const Array* inverse_permutation_indices) const override; std::unique_ptr row_permute( - const Array *permutation_indices) const override; + const Array* permutation_indices) const override; std::unique_ptr column_permute( - const Array *permutation_indices) const override; + const Array* permutation_indices) const override; std::unique_ptr inverse_row_permute( - const Array *inverse_permutation_indices) const override; + const Array* inverse_permutation_indices) const override; std::unique_ptr inverse_column_permute( - const Array *inverse_permutation_indices) const override; + const Array* inverse_permutation_indices) const override; std::unique_ptr> extract_diagonal() const override; @@ -785,7 +785,7 @@ class Csr : public EnableLinOp>, * * @return the values of the matrix. */ - value_type *get_values() noexcept { return values_.get_data(); } + value_type* get_values() noexcept { return values_.get_data(); } /** * @copydoc Csr::get_values() @@ -794,7 +794,7 @@ class Csr : public EnableLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type *get_const_values() const noexcept + const value_type* get_const_values() const noexcept { return values_.get_const_data(); } @@ -804,7 +804,7 @@ class Csr : public EnableLinOp>, * * @return the column indexes of the matrix. */ - index_type *get_col_idxs() noexcept { return col_idxs_.get_data(); } + index_type* get_col_idxs() noexcept { return col_idxs_.get_data(); } /** * @copydoc Csr::get_col_idxs() @@ -813,7 +813,7 @@ class Csr : public EnableLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type *get_const_col_idxs() const noexcept + const index_type* get_const_col_idxs() const noexcept { return col_idxs_.get_const_data(); } @@ -823,7 +823,7 @@ class Csr : public EnableLinOp>, * * @return the row pointers of the matrix. */ - index_type *get_row_ptrs() noexcept { return row_ptrs_.get_data(); } + index_type* get_row_ptrs() noexcept { return row_ptrs_.get_data(); } /** * @copydoc Csr::get_row_ptrs() @@ -832,7 +832,7 @@ class Csr : public EnableLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type *get_const_row_ptrs() const noexcept + const index_type* get_const_row_ptrs() const noexcept { return row_ptrs_.get_const_data(); } @@ -842,7 +842,7 @@ class Csr : public EnableLinOp>, * * @return the starting rows. */ - index_type *get_srow() noexcept { return srow_.get_data(); } + index_type* get_srow() noexcept { return srow_.get_data(); } /** * @copydoc Csr::get_srow() @@ -851,7 +851,7 @@ class Csr : public EnableLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type *get_const_srow() const noexcept + const index_type* get_const_srow() const noexcept { return srow_.get_const_data(); } @@ -916,7 +916,7 @@ class Csr : public EnableLinOp>, * @param num_nonzeros number of nonzeros * @param strategy the strategy of CSR */ - Csr(std::shared_ptr exec, const dim<2> &size = dim<2>{}, + Csr(std::shared_ptr exec, const dim<2>& size = dim<2>{}, size_type num_nonzeros = {}, std::shared_ptr strategy = std::make_shared()) : EnableLinOp(exec, size), @@ -949,8 +949,8 @@ class Csr : public EnableLinOp>, */ template - Csr(std::shared_ptr exec, const dim<2> &size, - ValuesArray &&values, ColIdxsArray &&col_idxs, RowPtrsArray &&row_ptrs, + Csr(std::shared_ptr exec, const dim<2>& size, + ValuesArray&& values, ColIdxsArray&& col_idxs, RowPtrsArray&& row_ptrs, std::shared_ptr strategy = std::make_shared()) : EnableLinOp(exec, size), values_{exec, std::forward(values)}, @@ -964,24 +964,24 @@ class Csr : public EnableLinOp>, this->make_srow(); } - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; // TODO clean this up as soon as we improve strategy_type template - void convert_strategy_helper(CsrType *result) const + void convert_strategy_helper(CsrType* result) const { auto strat = this->get_strategy().get(); std::shared_ptr new_strat; - if (dynamic_cast(strat)) { + if (dynamic_cast(strat)) { new_strat = std::make_shared(); - } else if (dynamic_cast(strat)) { + } else if (dynamic_cast(strat)) { new_strat = std::make_shared(); - } else if (dynamic_cast(strat)) { + } else if (dynamic_cast(strat)) { new_strat = std::make_shared(); - } else if (dynamic_cast(strat)) { + } else if (dynamic_cast(strat)) { new_strat = std::make_shared(); } else { auto rexec = result->get_executor(); @@ -990,7 +990,7 @@ class Csr : public EnableLinOp>, auto hip_exec = std::dynamic_pointer_cast(rexec); auto dpcpp_exec = std::dynamic_pointer_cast(rexec); - auto lb = dynamic_cast(strat); + auto lb = dynamic_cast(strat); if (cuda_exec) { if (lb) { new_strat = @@ -1105,7 +1105,7 @@ namespace detail { * @param result the csr matrix. */ template -void strategy_rebuild_helper(Csr *result) +void strategy_rebuild_helper(Csr* result) { using load_balance = typename Csr::load_balance; using automatical = typename Csr::automatical; diff --git a/include/ginkgo/core/matrix/dense.hpp b/include/ginkgo/core/matrix/dense.hpp index 9ec0a4b8692..4f733534022 100644 --- a/include/ginkgo/core/matrix/dense.hpp +++ b/include/ginkgo/core/matrix/dense.hpp @@ -152,7 +152,7 @@ class Dense * * @param other The other matrix whose configuration needs to copied. */ - static std::unique_ptr create_with_config_of(const Dense *other) + static std::unique_ptr create_with_config_of(const Dense* other) { // De-referencing `other` before calling the functions (instead of // using operator `->`) is currently required to be compatible with @@ -173,8 +173,8 @@ class Dense * @returns a Dense matrix with the type of other. */ static std::unique_ptr create_with_type_of( - const Dense *other, std::shared_ptr exec, - const dim<2> &size = dim<2>{}) + const Dense* other, std::shared_ptr exec, + const dim<2>& size = dim<2>{}) { // See create_with_config_of() return (*other).create_with_type_of_impl(exec, size, size[1]); @@ -189,8 +189,8 @@ class Dense * @note This is an overload which allows full parameter specification. */ static std::unique_ptr create_with_type_of( - const Dense *other, std::shared_ptr exec, - const dim<2> &size, size_type stride) + const Dense* other, std::shared_ptr exec, + const dim<2>& size, size_type stride) { // See create_with_config_of() return (*other).create_with_type_of_impl(exec, size, stride); @@ -198,69 +198,69 @@ class Dense friend class Dense>; - void convert_to(Dense *result) const override; + void convert_to(Dense* result) const override; - void move_to(Dense *result) override; + void move_to(Dense* result) override; - void convert_to(Dense> *result) const override; + void convert_to(Dense>* result) const override; - void move_to(Dense> *result) override; + void move_to(Dense>* result) override; - void convert_to(Coo *result) const override; + void convert_to(Coo* result) const override; - void move_to(Coo *result) override; + void move_to(Coo* result) override; - void convert_to(Coo *result) const override; + void convert_to(Coo* result) const override; - void move_to(Coo *result) override; + void move_to(Coo* result) override; - void convert_to(Csr *result) const override; + void convert_to(Csr* result) const override; - void move_to(Csr *result) override; + void move_to(Csr* result) override; - void convert_to(Csr *result) const override; + void convert_to(Csr* result) const override; - void move_to(Csr *result) override; + void move_to(Csr* result) override; - void convert_to(Ell *result) const override; + void convert_to(Ell* result) const override; - void move_to(Ell *result) override; + void move_to(Ell* result) override; - void convert_to(Ell *result) const override; + void convert_to(Ell* result) const override; - void move_to(Ell *result) override; + void move_to(Ell* result) override; - void convert_to(Hybrid *result) const override; + void convert_to(Hybrid* result) const override; - void move_to(Hybrid *result) override; + void move_to(Hybrid* result) override; - void convert_to(Hybrid *result) const override; + void convert_to(Hybrid* result) const override; - void move_to(Hybrid *result) override; + void move_to(Hybrid* result) override; - void convert_to(Sellp *result) const override; + void convert_to(Sellp* result) const override; - void move_to(Sellp *result) override; + void move_to(Sellp* result) override; - void convert_to(Sellp *result) const override; + void convert_to(Sellp* result) const override; - void move_to(Sellp *result) override; + void move_to(Sellp* result) override; - void convert_to(SparsityCsr *result) const override; + void convert_to(SparsityCsr* result) const override; - void move_to(SparsityCsr *result) override; + void move_to(SparsityCsr* result) override; - void convert_to(SparsityCsr *result) const override; + void convert_to(SparsityCsr* result) const override; - void move_to(SparsityCsr *result) override; + void move_to(SparsityCsr* result) override; - void read(const mat_data &data) override; + void read(const mat_data& data) override; - void read(const mat_data32 &data) override; + void read(const mat_data32& data) override; - void write(mat_data &data) const override; + void write(mat_data& data) const override; - void write(mat_data32 &data) const override; + void write(mat_data32& data) const override; std::unique_ptr transpose() const override; @@ -272,7 +272,7 @@ class Dense * @param output The output matrix. It must have the dimensions * `gko::transpose(this->get_size())` */ - void transpose(Dense *output) const; + void transpose(Dense* output) const; /** * Writes the conjugate-transposed matrix into the given output matrix. @@ -280,7 +280,7 @@ class Dense * @param output The output matrix. It must have the dimensions * `gko::transpose(this->get_size())` */ - void conj_transpose(Dense *output) const; + void conj_transpose(Dense* output) const; /** * Fill the dense matrix with a given value. @@ -290,10 +290,10 @@ class Dense void fill(const ValueType value); std::unique_ptr permute( - const Array *permutation_indices) const override; + const Array* permutation_indices) const override; std::unique_ptr permute( - const Array *permutation_indices) const override; + const Array* permutation_indices) const override; /** * Writes the symmetrically permuted matrix into the given output matrix. @@ -304,18 +304,18 @@ class Dense * `this->get_size()` * @see Dense::permute(const Array*) */ - void permute(const Array *permutation_indices, Dense *output) const; + void permute(const Array* permutation_indices, Dense* output) const; /** * @copydoc Dense::permute(const Array*, Dense*) */ - void permute(const Array *permutation_indices, Dense *output) const; + void permute(const Array* permutation_indices, Dense* output) const; std::unique_ptr inverse_permute( - const Array *permutation_indices) const override; + const Array* permutation_indices) const override; std::unique_ptr inverse_permute( - const Array *permutation_indices) const override; + const Array* permutation_indices) const override; /** * Writes the inverse symmetrically permuted matrix into the given output @@ -327,20 +327,20 @@ class Dense * `this->get_size()` * @see Dense::inverse_permute(const Array*) */ - void inverse_permute(const Array *permutation_indices, - Dense *output) const; + void inverse_permute(const Array* permutation_indices, + Dense* output) const; /** * @copydoc Dense::inverse_permute(const Array*, Dense*) */ - void inverse_permute(const Array *permutation_indices, - Dense *output) const; + void inverse_permute(const Array* permutation_indices, + Dense* output) const; std::unique_ptr row_permute( - const Array *permutation_indices) const override; + const Array* permutation_indices) const override; std::unique_ptr row_permute( - const Array *permutation_indices) const override; + const Array* permutation_indices) const override; /** * Writes the row-permuted matrix into the given output matrix. @@ -351,14 +351,14 @@ class Dense * `this->get_size()` * @see Dense::row_permute(const Array*) */ - void row_permute(const Array *permutation_indices, - Dense *output) const; + void row_permute(const Array* permutation_indices, + Dense* output) const; /** * @copydoc Dense::row_permute(const Array*, Dense*) */ - void row_permute(const Array *permutation_indices, - Dense *output) const; + void row_permute(const Array* permutation_indices, + Dense* output) const; /** * Create a Dense matrix consisting of the given rows from this matrix. @@ -370,12 +370,12 @@ class Dense * the gathered rows from this matrix: * `output(i,j) = input(gather_indices(i), j)` */ - std::unique_ptr row_gather(const Array *gather_indices) const; + std::unique_ptr row_gather(const Array* gather_indices) const; /** * @copydoc row_gather(const Array*) const */ - std::unique_ptr row_gather(const Array *gather_indices) const; + std::unique_ptr row_gather(const Array* gather_indices) const; /** * Copies the given rows from this matrix into `row_gathered` @@ -388,20 +388,20 @@ class Dense * It must have the same number of columns as this * matrix and `gather_indices->get_num_elems()` rows. */ - void row_gather(const Array *gather_indices, - Dense *row_gathered) const; + void row_gather(const Array* gather_indices, + Dense* row_gathered) const; /** * @copydoc row_gather(const Array*, Dense*) const */ - void row_gather(const Array *gather_indices, - Dense *row_gathered) const; + void row_gather(const Array* gather_indices, + Dense* row_gathered) const; std::unique_ptr column_permute( - const Array *permutation_indices) const override; + const Array* permutation_indices) const override; std::unique_ptr column_permute( - const Array *permutation_indices) const override; + const Array* permutation_indices) const override; /** * Writes the column-permuted matrix into the given output matrix. @@ -412,20 +412,20 @@ class Dense * `this->get_size()` * @see Dense::column_permute(const Array*) */ - void column_permute(const Array *permutation_indices, - Dense *output) const; + void column_permute(const Array* permutation_indices, + Dense* output) const; /** * @copydoc Dense::column_permute(const Array*, Dense*) */ - void column_permute(const Array *permutation_indices, - Dense *output) const; + void column_permute(const Array* permutation_indices, + Dense* output) const; std::unique_ptr inverse_row_permute( - const Array *permutation_indices) const override; + const Array* permutation_indices) const override; std::unique_ptr inverse_row_permute( - const Array *permutation_indices) const override; + const Array* permutation_indices) const override; /** * Writes the inverse row-permuted matrix into the given output matrix. @@ -436,20 +436,20 @@ class Dense * `this->get_size()` * @see Dense::inverse_row_permute(const Array*) */ - void inverse_row_permute(const Array *permutation_indices, - Dense *output) const; + void inverse_row_permute(const Array* permutation_indices, + Dense* output) const; /** * @copydoc Dense::inverse_row_permute(const Array*, Dense*) */ - void inverse_row_permute(const Array *permutation_indices, - Dense *output) const; + void inverse_row_permute(const Array* permutation_indices, + Dense* output) const; std::unique_ptr inverse_column_permute( - const Array *permutation_indices) const override; + const Array* permutation_indices) const override; std::unique_ptr inverse_column_permute( - const Array *permutation_indices) const override; + const Array* permutation_indices) const override; /** * Writes the inverse column-permuted matrix into the given output matrix. @@ -460,14 +460,14 @@ class Dense * `this->get_size()` * @see Dense::inverse_column_permute(const Array*) */ - void inverse_column_permute(const Array *permutation_indices, - Dense *output) const; + void inverse_column_permute(const Array* permutation_indices, + Dense* output) const; /** * @copydoc Dense::inverse_column_permute(const Array*, Dense*) */ - void inverse_column_permute(const Array *permutation_indices, - Dense *output) const; + void inverse_column_permute(const Array* permutation_indices, + Dense* output) const; std::unique_ptr> extract_diagonal() const override; @@ -478,7 +478,7 @@ class Dense * matrix's diagonal. * @see Dense::extract_diagonal() */ - void extract_diagonal(Diagonal *output) const; + void extract_diagonal(Diagonal* output) const; std::unique_ptr compute_absolute() const override; @@ -489,7 +489,7 @@ class Dense * matrix. * @see Dense::compute_absolute() */ - void compute_absolute(absolute_type *output) const; + void compute_absolute(absolute_type* output) const; void compute_absolute_inplace() override; @@ -504,7 +504,7 @@ class Dense * If the original matrix was real, the imaginary part of the result will * be zero. */ - void make_complex(complex_type *result) const; + void make_complex(complex_type* result) const; /** * Creates a new real matrix and extracts the real part of the original @@ -515,7 +515,7 @@ class Dense /** * Extracts the real part of the original matrix into a given real matrix. */ - void get_real(real_type *result) const; + void get_real(real_type* result) const; /** * Creates a new real matrix and extracts the imaginary part of the @@ -527,14 +527,14 @@ class Dense * Extracts the imaginary part of the original matrix into a given real * matrix. */ - void get_imag(real_type *result) const; + void get_imag(real_type* result) const; /** * Returns a pointer to the array of values of the matrix. * * @return the pointer to the array of values */ - value_type *get_values() noexcept { return values_.get_data(); } + value_type* get_values() noexcept { return values_.get_data(); } /** * @copydoc get_values() @@ -543,7 +543,7 @@ class Dense * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type *get_const_values() const noexcept + const value_type* get_const_values() const noexcept { return values_.get_const_data(); } @@ -575,7 +575,7 @@ class Dense * stored at (e.g. trying to call this method on a GPU matrix from * the OMP results in a runtime error) */ - value_type &at(size_type row, size_type col) noexcept + value_type& at(size_type row, size_type col) noexcept { return values_.get_data()[linearize_index(row, col)]; } @@ -602,7 +602,7 @@ class Dense * stored at (e.g. trying to call this method on a GPU matrix from * the OMP results in a runtime error) */ - ValueType &at(size_type idx) noexcept + ValueType& at(size_type idx) noexcept { return values_.get_data()[linearize_index(idx)]; } @@ -624,7 +624,7 @@ class Dense * element of alpha (the number of columns of alpha has to * match the number of columns of the matrix). */ - void scale(const LinOp *alpha) + void scale(const LinOp* alpha) { auto exec = this->get_executor(); this->scale_impl(make_temporary_clone(exec, alpha).get()); @@ -639,7 +639,7 @@ class Dense * of the i-th element of alpha (the number of columns of * alpha has to match the number of columns of the matrix). */ - void inv_scale(const LinOp *alpha) + void inv_scale(const LinOp* alpha) { auto exec = this->get_executor(); this->inv_scale_impl(make_temporary_clone(exec, alpha).get()); @@ -655,7 +655,7 @@ class Dense * match the number of columns of the matrix). * @param b a matrix of the same dimension as this */ - void add_scaled(const LinOp *alpha, const LinOp *b) + void add_scaled(const LinOp* alpha, const LinOp* b) { auto exec = this->get_executor(); this->add_scaled_impl(make_temporary_clone(exec, alpha).get(), @@ -672,7 +672,7 @@ class Dense * match the number of columns of the matrix). * @param b a matrix of the same dimension as this */ - void sub_scaled(const LinOp *alpha, const LinOp *b) + void sub_scaled(const LinOp* alpha, const LinOp* b) { auto exec = this->get_executor(); this->sub_scaled_impl(make_temporary_clone(exec, alpha).get(), @@ -687,7 +687,7 @@ class Dense * (the number of column in the vector must match the number * of columns of this) */ - void compute_dot(const LinOp *b, LinOp *result) const + void compute_dot(const LinOp* b, LinOp* result) const { auto exec = this->get_executor(); this->compute_dot_impl(make_temporary_clone(exec, b).get(), @@ -702,7 +702,7 @@ class Dense * (the number of column in the vector must match the number * of columns of this) */ - void compute_conj_dot(const LinOp *b, LinOp *result) const + void compute_conj_dot(const LinOp* b, LinOp* result) const { auto exec = this->get_executor(); this->compute_conj_dot_impl( @@ -717,7 +717,7 @@ class Dense * (the number of columns in the vector must match the number * of columns of this) */ - void compute_norm2(LinOp *result) const + void compute_norm2(LinOp* result) const { auto exec = this->get_executor(); this->compute_norm2_impl( @@ -734,8 +734,8 @@ class Dense * @param columns column span * @param stride stride of the new submatrix. */ - std::unique_ptr create_submatrix(const span &rows, - const span &columns, + std::unique_ptr create_submatrix(const span& rows, + const span& columns, const size_type stride) { return this->create_submatrix_impl(rows, columns, stride); @@ -747,8 +747,8 @@ class Dense * @param rows row span * @param columns column span */ - std::unique_ptr create_submatrix(const span &rows, - const span &columns) + std::unique_ptr create_submatrix(const span& rows, + const span& columns) { return create_submatrix(rows, columns, this->get_stride()); } @@ -773,7 +773,7 @@ class Dense this->get_executor(), dim<2>{num_rows, num_cols}, Array>::view( this->get_executor(), num_rows * stride, - reinterpret_cast *>( + reinterpret_cast*>( this->get_values())), stride); } @@ -795,8 +795,8 @@ class Dense this->get_executor(), dim<2>{num_rows, num_cols}, Array>::view( this->get_executor(), num_rows * stride, - const_cast *>( - reinterpret_cast *>( + const_cast*>( + reinterpret_cast*>( this->get_const_values()))), stride); } @@ -808,7 +808,7 @@ class Dense * @param exec Executor associated to the matrix * @param size size of the matrix */ - Dense(std::shared_ptr exec, const dim<2> &size = dim<2>{}) + Dense(std::shared_ptr exec, const dim<2>& size = dim<2>{}) : Dense(std::move(exec), size, size[1]) {} @@ -821,7 +821,7 @@ class Dense * elements of two consecutive rows, expressed as the * number of matrix elements) */ - Dense(std::shared_ptr exec, const dim<2> &size, + Dense(std::shared_ptr exec, const dim<2>& size, size_type stride) : EnableLinOp(exec, size), values_(exec, size[0] * stride), @@ -845,8 +845,8 @@ class Dense * original array data will not be used in the matrix. */ template - Dense(std::shared_ptr exec, const dim<2> &size, - ValuesArray &&values, size_type stride) + Dense(std::shared_ptr exec, const dim<2>& size, + ValuesArray&& values, size_type stride) : EnableLinOp(exec, size), values_{exec, std::forward(values)}, stride_{stride} @@ -877,7 +877,7 @@ class Dense * @returns a Dense matrix with the same type as the caller. */ virtual std::unique_ptr create_with_type_of_impl( - std::shared_ptr exec, const dim<2> &size, + std::shared_ptr exec, const dim<2>& size, size_type stride) const { return Dense::create(exec, size, stride); @@ -889,7 +889,7 @@ class Dense * @note Other implementations of dense should override this function * instead of scale(const LinOp *alpha). */ - virtual void scale_impl(const LinOp *alpha); + virtual void scale_impl(const LinOp* alpha); /** * @copydoc inv_scale(const LinOp *) @@ -897,7 +897,7 @@ class Dense * @note Other implementations of dense should override this function * instead of inv_scale(const LinOp *alpha). */ - virtual void inv_scale_impl(const LinOp *alpha); + virtual void inv_scale_impl(const LinOp* alpha); /** * @copydoc add_scaled(const LinOp *, const LinOp *) @@ -905,7 +905,7 @@ class Dense * @note Other implementations of dense should override this function * instead of add_scale(const LinOp *alpha, const LinOp *b). */ - virtual void add_scaled_impl(const LinOp *alpha, const LinOp *b); + virtual void add_scaled_impl(const LinOp* alpha, const LinOp* b); /** * @copydoc sub_scaled(const LinOp *, const LinOp *) @@ -913,7 +913,7 @@ class Dense * @note Other implementations of dense should override this function * instead of sub_scale(const LinOp *alpha, const LinOp *b). */ - virtual void sub_scaled_impl(const LinOp *alpha, const LinOp *b); + virtual void sub_scaled_impl(const LinOp* alpha, const LinOp* b); /** * @copydoc compute_dot(const LinOp *, LinOp *) const @@ -921,7 +921,7 @@ class Dense * @note Other implementations of dense should override this function * instead of compute_dot(const LinOp *b, LinOp *result). */ - virtual void compute_dot_impl(const LinOp *b, LinOp *result) const; + virtual void compute_dot_impl(const LinOp* b, LinOp* result) const; /** * @copydoc compute_conj_dot(const LinOp *, LinOp *) const @@ -929,7 +929,7 @@ class Dense * @note Other implementations of dense should override this function * instead of compute_conj_dot(const LinOp *b, LinOp *result). */ - virtual void compute_conj_dot_impl(const LinOp *b, LinOp *result) const; + virtual void compute_conj_dot_impl(const LinOp* b, LinOp* result) const; /** * @copydoc compute_norm2(LinOp *) const @@ -937,7 +937,7 @@ class Dense * @note Other implementations of dense should override this function * instead of compute_norm2(LinOp *result). */ - virtual void compute_norm2_impl(LinOp *result) const; + virtual void compute_norm2_impl(LinOp* result) const; /** * @copydoc create_submatrix(const span, const span, const size_type) @@ -946,8 +946,8 @@ class Dense * instead of create_submatrix(const span, const span, const * size_type). */ - virtual std::unique_ptr create_submatrix_impl(const span &rows, - const span &columns, + virtual std::unique_ptr create_submatrix_impl(const span& rows, + const span& columns, const size_type stride) { row_major_range range_this{this->get_values(), this->get_size()[0], @@ -965,10 +965,10 @@ class Dense stride); } - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; size_type linearize_index(size_type row, size_type col) const noexcept { @@ -982,31 +982,31 @@ class Dense } template - void permute_impl(const Array *permutation, Dense *output) const; + void permute_impl(const Array* permutation, Dense* output) const; template - void inverse_permute_impl(const Array *permutation, - Dense *output) const; + void inverse_permute_impl(const Array* permutation, + Dense* output) const; template - void row_permute_impl(const Array *permutation, - Dense *output) const; + void row_permute_impl(const Array* permutation, + Dense* output) const; template - void inverse_row_permute_impl(const Array *permutation, - Dense *output) const; + void inverse_row_permute_impl(const Array* permutation, + Dense* output) const; template - void row_gather_impl(const Array *row_indices, - Dense *output) const; + void row_gather_impl(const Array* row_indices, + Dense* output) const; template - void column_permute_impl(const Array *permutation, - Dense *output) const; + void column_permute_impl(const Array* permutation, + Dense* output) const; template - void inverse_column_permute_impl(const Array *permutation, - Dense *output) const; + void inverse_column_permute_impl(const Array* permutation, + Dense* output) const; private: Array values_; @@ -1023,7 +1023,7 @@ namespace detail { template struct temporary_clone_helper> { static std::unique_ptr> create( - std::shared_ptr exec, matrix::Dense *ptr, + std::shared_ptr exec, matrix::Dense* ptr, bool copy_data) { if (copy_data) { @@ -1062,13 +1062,13 @@ struct temporary_clone_helper> { template std::unique_ptr initialize( size_type stride, std::initializer_list vals, - std::shared_ptr exec, TArgs &&... create_args) + std::shared_ptr exec, TArgs&&... create_args) { using dense = matrix::Dense; size_type num_rows = vals.size(); auto tmp = dense::create(exec->get_master(), dim<2>{num_rows, 1}, stride); size_type idx = 0; - for (const auto &elem : vals) { + for (const auto& elem : vals) { tmp->at(idx) = elem; ++idx; } @@ -1101,7 +1101,7 @@ std::unique_ptr initialize( template std::unique_ptr initialize( std::initializer_list vals, - std::shared_ptr exec, TArgs &&... create_args) + std::shared_ptr exec, TArgs&&... create_args) { return initialize(1, vals, std::move(exec), std::forward(create_args)...); @@ -1134,7 +1134,7 @@ std::unique_ptr initialize( size_type stride, std::initializer_list> vals, - std::shared_ptr exec, TArgs &&... create_args) + std::shared_ptr exec, TArgs&&... create_args) { using dense = matrix::Dense; size_type num_rows = vals.size(); @@ -1142,9 +1142,9 @@ std::unique_ptr initialize( auto tmp = dense::create(exec->get_master(), dim<2>{num_rows, num_cols}, stride); size_type ridx = 0; - for (const auto &row : vals) { + for (const auto& row : vals) { size_type cidx = 0; - for (const auto &elem : row) { + for (const auto& elem : row) { tmp->at(ridx, cidx) = elem; ++cidx; } @@ -1182,7 +1182,7 @@ template std::unique_ptr initialize( std::initializer_list> vals, - std::shared_ptr exec, TArgs &&... create_args) + std::shared_ptr exec, TArgs&&... create_args) { return initialize(vals.size() > 0 ? begin(vals)->size() : 0, vals, std::move(exec), diff --git a/include/ginkgo/core/matrix/diagonal.hpp b/include/ginkgo/core/matrix/diagonal.hpp index 4215a08cad6..c99d4c440f0 100644 --- a/include/ginkgo/core/matrix/diagonal.hpp +++ b/include/ginkgo/core/matrix/diagonal.hpp @@ -99,17 +99,17 @@ class Diagonal std::unique_ptr conj_transpose() const override; - void convert_to(Diagonal> *result) const override; + void convert_to(Diagonal>* result) const override; - void move_to(Diagonal> *result) override; + void move_to(Diagonal>* result) override; - void convert_to(Csr *result) const override; + void convert_to(Csr* result) const override; - void move_to(Csr *result) override; + void move_to(Csr* result) override; - void convert_to(Csr *result) const override; + void convert_to(Csr* result) const override; - void move_to(Csr *result) override; + void move_to(Csr* result) override; std::unique_ptr compute_absolute() const override; @@ -120,7 +120,7 @@ class Diagonal * * @return the pointer to the array of values */ - value_type *get_values() noexcept { return values_.get_data(); } + value_type* get_values() noexcept { return values_.get_data(); } /** * @copydoc get_values() @@ -129,7 +129,7 @@ class Diagonal * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type *get_const_values() const noexcept + const value_type* get_const_values() const noexcept { return values_.get_const_data(); } @@ -141,7 +141,7 @@ class Diagonal * @param b the input vector(s) on which the diagonal matrix is applied * @param x the output vector(s) where the result is stored */ - void rapply(const LinOp *b, LinOp *x) const + void rapply(const LinOp* b, LinOp* x) const { GKO_ASSERT_REVERSE_CONFORMANT(this, b); GKO_ASSERT_EQUAL_ROWS(b, x); @@ -150,13 +150,13 @@ class Diagonal this->rapply_impl(b, x); } - void read(const mat_data &data) override; + void read(const mat_data& data) override; - void read(const mat_data32 &data) override; + void read(const mat_data32& data) override; - void write(mat_data &data) const override; + void write(mat_data& data) const override; - void write(mat_data32 &data) const override; + void write(mat_data32& data) const override; protected: @@ -195,19 +195,19 @@ class Diagonal */ template Diagonal(std::shared_ptr exec, const size_type size, - ValuesArray &&values) + ValuesArray&& values) : EnableLinOp(exec, dim<2>(size)), values_{exec, std::forward(values)} { GKO_ENSURE_IN_BOUNDS(size - 1, values_.get_num_elems()); } - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; - void rapply_impl(const LinOp *b, LinOp *x) const; + void rapply_impl(const LinOp* b, LinOp* x) const; private: diff --git a/include/ginkgo/core/matrix/ell.hpp b/include/ginkgo/core/matrix/ell.hpp index ea24770fbea..f20a0c786d3 100644 --- a/include/ginkgo/core/matrix/ell.hpp +++ b/include/ginkgo/core/matrix/ell.hpp @@ -97,21 +97,21 @@ class Ell : public EnableLinOp>, friend class Ell, IndexType>; void convert_to( - Ell, IndexType> *result) const override; + Ell, IndexType>* result) const override; - void move_to(Ell, IndexType> *result) override; + void move_to(Ell, IndexType>* result) override; - void convert_to(Dense *other) const override; + void convert_to(Dense* other) const override; - void move_to(Dense *other) override; + void move_to(Dense* other) override; - void convert_to(Csr *other) const override; + void convert_to(Csr* other) const override; - void move_to(Csr *other) override; + void move_to(Csr* other) override; - void read(const mat_data &data) override; + void read(const mat_data& data) override; - void write(mat_data &data) const override; + void write(mat_data& data) const override; std::unique_ptr> extract_diagonal() const override; @@ -124,7 +124,7 @@ class Ell : public EnableLinOp>, * * @return the values of the matrix. */ - value_type *get_values() noexcept { return values_.get_data(); } + value_type* get_values() noexcept { return values_.get_data(); } /** * @copydoc Ell::get_values() @@ -133,7 +133,7 @@ class Ell : public EnableLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type *get_const_values() const noexcept + const value_type* get_const_values() const noexcept { return values_.get_const_data(); } @@ -143,7 +143,7 @@ class Ell : public EnableLinOp>, * * @return the column indexes of the matrix. */ - index_type *get_col_idxs() noexcept { return col_idxs_.get_data(); } + index_type* get_col_idxs() noexcept { return col_idxs_.get_data(); } /** * @copydoc Ell::get_col_idxs() @@ -152,7 +152,7 @@ class Ell : public EnableLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type *get_const_col_idxs() const noexcept + const index_type* get_const_col_idxs() const noexcept { return col_idxs_.get_const_data(); } @@ -194,7 +194,7 @@ class Ell : public EnableLinOp>, * stored at (e.g. trying to call this method on a GPU matrix from * the OMP results in a runtime error) */ - value_type &val_at(size_type row, size_type idx) noexcept + value_type& val_at(size_type row, size_type idx) noexcept { return values_.get_data()[this->linearize_index(row, idx)]; } @@ -217,7 +217,7 @@ class Ell : public EnableLinOp>, * stored at (e.g. trying to call this method on a GPU matrix from * the OMP results in a runtime error) */ - index_type &col_at(size_type row, size_type idx) noexcept + index_type& col_at(size_type row, size_type idx) noexcept { return this->get_col_idxs()[this->linearize_index(row, idx)]; } @@ -240,7 +240,7 @@ class Ell : public EnableLinOp>, * @param exec Executor associated to the matrix * @param size size of the matrix */ - Ell(std::shared_ptr exec, const dim<2> &size = dim<2>{}) + Ell(std::shared_ptr exec, const dim<2>& size = dim<2>{}) : Ell(std::move(exec), size, size[1]) {} @@ -253,7 +253,7 @@ class Ell : public EnableLinOp>, * @param num_stored_elements_per_row the number of stored elements per * row */ - Ell(std::shared_ptr exec, const dim<2> &size, + Ell(std::shared_ptr exec, const dim<2>& size, size_type num_stored_elements_per_row) : Ell(std::move(exec), size, num_stored_elements_per_row, size[0]) {} @@ -267,7 +267,7 @@ class Ell : public EnableLinOp>, * row * @param stride stride of the rows */ - Ell(std::shared_ptr exec, const dim<2> &size, + Ell(std::shared_ptr exec, const dim<2>& size, size_type num_stored_elements_per_row, size_type stride) : EnableLinOp(exec, size), values_(exec, stride * num_stored_elements_per_row), @@ -298,8 +298,8 @@ class Ell : public EnableLinOp>, * array data will not be used in the matrix. */ template - Ell(std::shared_ptr exec, const dim<2> &size, - ValuesArray &&values, ColIdxsArray &&col_idxs, + Ell(std::shared_ptr exec, const dim<2>& size, + ValuesArray&& values, ColIdxsArray&& col_idxs, size_type num_stored_elements_per_row, size_type stride) : EnableLinOp(exec, size), values_{exec, std::forward(values)}, @@ -313,10 +313,10 @@ class Ell : public EnableLinOp>, col_idxs_.get_num_elems()); } - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; size_type linearize_index(size_type row, size_type col) const noexcept { diff --git a/include/ginkgo/core/matrix/fbcsr.hpp b/include/ginkgo/core/matrix/fbcsr.hpp index 38cac854bc5..11c6d29001f 100644 --- a/include/ginkgo/core/matrix/fbcsr.hpp +++ b/include/ginkgo/core/matrix/fbcsr.hpp @@ -166,13 +166,13 @@ class Fbcsr : public EnableLinOp>, friend class Fbcsr, IndexType>; void convert_to( - Fbcsr, IndexType> *result) const override; + Fbcsr, IndexType>* result) const override; - void move_to(Fbcsr, IndexType> *result) override; + void move_to(Fbcsr, IndexType>* result) override; - void convert_to(Dense *other) const override; + void convert_to(Dense* other) const override; - void move_to(Dense *other) override; + void move_to(Dense* other) override; /** * Converts the matrix to CSR format @@ -180,9 +180,9 @@ class Fbcsr : public EnableLinOp>, * @note Any explicit zeros in the original matrix are retained * in the converted result. */ - void convert_to(Csr *result) const override; + void convert_to(Csr* result) const override; - void move_to(Csr *result) override; + void move_to(Csr* result) override; /** * Get the block sparsity pattern in CSR-like format @@ -190,9 +190,9 @@ class Fbcsr : public EnableLinOp>, * @note The actual non-zero values are never copied; * the result always has a value array of size 1 with the value 1. */ - void convert_to(SparsityCsr *result) const override; + void convert_to(SparsityCsr* result) const override; - void move_to(SparsityCsr *result) override; + void move_to(SparsityCsr* result) override; /** * Reads a @ref matrix_data into Fbcsr format. @@ -200,9 +200,9 @@ class Fbcsr : public EnableLinOp>, * * @warning Unlike Csr::read, here explicit non-zeros are NOT dropped. */ - void read(const mat_data &data) override; + void read(const mat_data& data) override; - void write(mat_data &data) const override; + void write(mat_data& data) const override; std::unique_ptr transpose() const override; @@ -231,7 +231,7 @@ class Fbcsr : public EnableLinOp>, /** * @return The values of the matrix. */ - value_type *get_values() noexcept { return values_.get_data(); } + value_type* get_values() noexcept { return values_.get_data(); } /** * @copydoc Fbcsr::get_values() @@ -240,7 +240,7 @@ class Fbcsr : public EnableLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type *get_const_values() const noexcept + const value_type* get_const_values() const noexcept { return values_.get_const_data(); } @@ -248,7 +248,7 @@ class Fbcsr : public EnableLinOp>, /** * @return The column indexes of the matrix. */ - index_type *get_col_idxs() noexcept { return col_idxs_.get_data(); } + index_type* get_col_idxs() noexcept { return col_idxs_.get_data(); } /** * @copydoc Fbcsr::get_col_idxs() @@ -257,7 +257,7 @@ class Fbcsr : public EnableLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type *get_const_col_idxs() const noexcept + const index_type* get_const_col_idxs() const noexcept { return col_idxs_.get_const_data(); } @@ -265,7 +265,7 @@ class Fbcsr : public EnableLinOp>, /** * @return The row pointers of the matrix. */ - index_type *get_row_ptrs() noexcept { return row_ptrs_.get_data(); } + index_type* get_row_ptrs() noexcept { return row_ptrs_.get_data(); } /** * @copydoc Fbcsr::get_row_ptrs() @@ -274,7 +274,7 @@ class Fbcsr : public EnableLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type *get_const_row_ptrs() const noexcept + const index_type* get_const_row_ptrs() const noexcept { return row_ptrs_.get_const_data(); } @@ -340,7 +340,7 @@ class Fbcsr : public EnableLinOp>, * @param num_nonzeros number of nonzeros * @param block_size Size of the small dense square blocks */ - Fbcsr(std::shared_ptr exec, const dim<2> &size, + Fbcsr(std::shared_ptr exec, const dim<2>& size, size_type num_nonzeros, int block_size) : EnableLinOp(exec, size), bs_{block_size}, @@ -375,9 +375,9 @@ class Fbcsr : public EnableLinOp>, */ template - Fbcsr(std::shared_ptr exec, const dim<2> &size, - int block_size, ValuesArray &&values, ColIdxsArray &&col_idxs, - RowPtrsArray &&row_ptrs) + Fbcsr(std::shared_ptr exec, const dim<2>& size, + int block_size, ValuesArray&& values, ColIdxsArray&& col_idxs, + RowPtrsArray&& row_ptrs) : EnableLinOp(exec, size), bs_{block_size}, nbcols_{static_cast( @@ -391,10 +391,10 @@ class Fbcsr : public EnableLinOp>, GKO_ASSERT_EQ(this->get_size()[0] / bs_ + 1, row_ptrs_.get_num_elems()); } - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; private: int bs_; ///< Block size diff --git a/include/ginkgo/core/matrix/hybrid.hpp b/include/ginkgo/core/matrix/hybrid.hpp index 7511000fade..08b15ef8645 100644 --- a/include/ginkgo/core/matrix/hybrid.hpp +++ b/include/ginkgo/core/matrix/hybrid.hpp @@ -130,9 +130,9 @@ class Hybrid * part * @param coo_nnz the output number of nonzeros of the coo part */ - void compute_hybrid_config(const Array &row_nnz, - size_type *ell_num_stored_elements_per_row, - size_type *coo_nnz) + void compute_hybrid_config(const Array& row_nnz, + size_type* ell_num_stored_elements_per_row, + size_type* coo_nnz) { Array ref_row_nnz(row_nnz.get_executor()->get_master(), row_nnz.get_num_elems()); @@ -169,7 +169,7 @@ class Hybrid * @return the number of stored elements per row of the ell part */ virtual size_type compute_ell_num_stored_elements_per_row( - Array *row_nnz) const = 0; + Array* row_nnz) const = 0; protected: /** @@ -180,7 +180,7 @@ class Hybrid * * @return the number of nonzeros of the coo part */ - size_type compute_coo_nnz(const Array &row_nnz) const + size_type compute_coo_nnz(const Array& row_nnz) const { size_type coo_nnz = 0; auto row_nnz_val = row_nnz.get_const_data(); @@ -214,7 +214,7 @@ class Hybrid {} size_type compute_ell_num_stored_elements_per_row( - Array *row_nnz) const override + Array* row_nnz) const override { return num_columns_; } @@ -252,7 +252,7 @@ class Hybrid } size_type compute_ell_num_stored_elements_per_row( - Array *row_nnz) const override + Array* row_nnz) const override { auto row_nnz_val = row_nnz->get_data(); auto num_rows = row_nnz->get_num_elems(); @@ -294,7 +294,7 @@ class Hybrid {} size_type compute_ell_num_stored_elements_per_row( - Array *row_nnz) const override + Array* row_nnz) const override { auto num_rows = row_nnz->get_num_elems(); auto ell_cols = @@ -340,7 +340,7 @@ class Hybrid {} size_type compute_ell_num_stored_elements_per_row( - Array *row_nnz) const override + Array* row_nnz) const override { return strategy_.compute_ell_num_stored_elements_per_row(row_nnz); } @@ -369,7 +369,7 @@ class Hybrid automatic() : strategy_(imbalance_bounded_limit(1.0 / 3.0, 0.001)) {} size_type compute_ell_num_stored_elements_per_row( - Array *row_nnz) const override + Array* row_nnz) const override { return strategy_.compute_ell_num_stored_elements_per_row(row_nnz); } @@ -381,21 +381,21 @@ class Hybrid friend class Hybrid, IndexType>; void convert_to( - Hybrid, IndexType> *result) const override; + Hybrid, IndexType>* result) const override; - void move_to(Hybrid, IndexType> *result) override; + void move_to(Hybrid, IndexType>* result) override; - void convert_to(Dense *other) const override; + void convert_to(Dense* other) const override; - void move_to(Dense *other) override; + void move_to(Dense* other) override; - void convert_to(Csr *other) const override; + void convert_to(Csr* other) const override; - void move_to(Csr *other) override; + void move_to(Csr* other) override; - void read(const mat_data &data) override; + void read(const mat_data& data) override; - void write(mat_data &data) const override; + void write(mat_data& data) const override; std::unique_ptr> extract_diagonal() const override; @@ -408,7 +408,7 @@ class Hybrid * * @return the values of the ell part */ - value_type *get_ell_values() noexcept { return ell_->get_values(); } + value_type* get_ell_values() noexcept { return ell_->get_values(); } /** * @copydoc Hybrid::get_ell_values() @@ -417,7 +417,7 @@ class Hybrid * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type *get_const_ell_values() const noexcept + const value_type* get_const_ell_values() const noexcept { return ell_->get_const_values(); } @@ -427,7 +427,7 @@ class Hybrid * * @return the column indexes of the ell part */ - index_type *get_ell_col_idxs() noexcept { return ell_->get_col_idxs(); } + index_type* get_ell_col_idxs() noexcept { return ell_->get_col_idxs(); } /** * @copydoc Hybrid::get_ell_col_idxs() @@ -436,7 +436,7 @@ class Hybrid * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type *get_const_ell_col_idxs() const noexcept + const index_type* get_const_ell_col_idxs() const noexcept { return ell_->get_const_col_idxs(); } @@ -479,7 +479,7 @@ class Hybrid * stored at (e.g. trying to call this method on a GPU matrix from * the OMP results in a runtime error) */ - value_type &ell_val_at(size_type row, size_type idx) noexcept + value_type& ell_val_at(size_type row, size_type idx) noexcept { return ell_->val_at(row, idx); } @@ -502,7 +502,7 @@ class Hybrid * stored at (e.g. trying to call this method on a GPU matrix from * the OMP results in a runtime error) */ - index_type &ell_col_at(size_type row, size_type idx) noexcept + index_type& ell_col_at(size_type row, size_type idx) noexcept { return ell_->col_at(row, idx); } @@ -520,14 +520,14 @@ class Hybrid * * @return the matrix of the ell part */ - const ell_type *get_ell() const noexcept { return ell_.get(); } + const ell_type* get_ell() const noexcept { return ell_.get(); } /** * Returns the values of the coo part. * * @return the values of the coo part. */ - value_type *get_coo_values() noexcept { return coo_->get_values(); } + value_type* get_coo_values() noexcept { return coo_->get_values(); } /** * @copydoc Hybrid::get_coo_values() @@ -536,7 +536,7 @@ class Hybrid * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type *get_const_coo_values() const noexcept + const value_type* get_const_coo_values() const noexcept { return coo_->get_const_values(); } @@ -546,7 +546,7 @@ class Hybrid * * @return the column indexes of the coo part. */ - index_type *get_coo_col_idxs() noexcept { return coo_->get_col_idxs(); } + index_type* get_coo_col_idxs() noexcept { return coo_->get_col_idxs(); } /** * @copydoc Hybrid::get_coo_col_idxs() @@ -555,7 +555,7 @@ class Hybrid * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type *get_const_coo_col_idxs() const noexcept + const index_type* get_const_coo_col_idxs() const noexcept { return coo_->get_const_col_idxs(); } @@ -565,7 +565,7 @@ class Hybrid * * @return the row indexes of the coo part. */ - index_type *get_coo_row_idxs() noexcept { return coo_->get_row_idxs(); } + index_type* get_coo_row_idxs() noexcept { return coo_->get_row_idxs(); } /** * @copydoc Hybrid::get_coo_row_idxs() @@ -574,7 +574,7 @@ class Hybrid * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type *get_const_coo_row_idxs() const noexcept + const index_type* get_const_coo_row_idxs() const noexcept { return coo_->get_const_row_idxs(); } @@ -594,7 +594,7 @@ class Hybrid * * @return the matrix of the coo part */ - const coo_type *get_coo() const noexcept { return coo_.get(); } + const coo_type* get_coo() const noexcept { return coo_.get(); } /** * Returns the number of elements explicitly stored in the matrix. @@ -634,7 +634,7 @@ class Hybrid * * @return this */ - Hybrid &operator=(const Hybrid &other) + Hybrid& operator=(const Hybrid& other) { if (&other == this) { return *this; @@ -670,7 +670,7 @@ class Hybrid * @param strategy strategy of deciding the Hybrid config */ Hybrid( - std::shared_ptr exec, const dim<2> &size, + std::shared_ptr exec, const dim<2>& size, std::shared_ptr strategy = std::make_shared()) : Hybrid(std::move(exec), size, size[1], std::move(strategy)) {} @@ -686,7 +686,7 @@ class Hybrid * @param strategy strategy of deciding the Hybrid config */ Hybrid( - std::shared_ptr exec, const dim<2> &size, + std::shared_ptr exec, const dim<2>& size, size_type num_stored_elements_per_row, std::shared_ptr strategy = std::make_shared()) : Hybrid(std::move(exec), size, num_stored_elements_per_row, size[0], @@ -703,7 +703,7 @@ class Hybrid * @param stride stride of the rows * @param strategy strategy of deciding the Hybrid config */ - Hybrid(std::shared_ptr exec, const dim<2> &size, + Hybrid(std::shared_ptr exec, const dim<2>& size, size_type num_stored_elements_per_row, size_type stride, std::shared_ptr strategy) : Hybrid(std::move(exec), size, num_stored_elements_per_row, stride, {}, @@ -722,7 +722,7 @@ class Hybrid * @param strategy strategy of deciding the Hybrid config */ Hybrid( - std::shared_ptr exec, const dim<2> &size, + std::shared_ptr exec, const dim<2>& size, size_type num_stored_elements_per_row, size_type stride, size_type num_nonzeros = {}, std::shared_ptr strategy = std::make_shared()) @@ -733,10 +733,10 @@ class Hybrid strategy_(std::move(strategy)) {} - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; private: std::shared_ptr ell_; diff --git a/include/ginkgo/core/matrix/identity.hpp b/include/ginkgo/core/matrix/identity.hpp index 5c5315eedce..473f36faf5d 100644 --- a/include/ginkgo/core/matrix/identity.hpp +++ b/include/ginkgo/core/matrix/identity.hpp @@ -108,10 +108,10 @@ class Identity : public EnableLinOp>, : EnableLinOp(exec, dim<2>{size}) {} - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; }; diff --git a/include/ginkgo/core/matrix/permutation.hpp b/include/ginkgo/core/matrix/permutation.hpp index fe0b5c5b3f7..4cf252ba2b9 100644 --- a/include/ginkgo/core/matrix/permutation.hpp +++ b/include/ginkgo/core/matrix/permutation.hpp @@ -89,7 +89,7 @@ class Permutation : public EnableLinOp>, * * @return the pointer to the row permutation array. */ - index_type *get_permutation() noexcept { return permutation_.get_data(); } + index_type* get_permutation() noexcept { return permutation_.get_data(); } /** * @copydoc get_permutation() @@ -98,7 +98,7 @@ class Permutation : public EnableLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type *get_const_permutation() const noexcept + const index_type* get_const_permutation() const noexcept { return permutation_.get_const_data(); } @@ -150,8 +150,8 @@ class Permutation : public EnableLinOp>, * @param size size of the permutable matrix * @param enabled_permute mask for the type of permutation to apply. */ - Permutation(std::shared_ptr exec, const dim<2> &size, - const mask_type &enabled_permute = row_permute) + Permutation(std::shared_ptr exec, const dim<2>& size, + const mask_type& enabled_permute = row_permute) : EnableLinOp(exec, size), permutation_(exec, size[0]), row_size_(size[0]), @@ -175,9 +175,9 @@ class Permutation : public EnableLinOp>, * and the original array data will not be used in the matrix. */ template - Permutation(std::shared_ptr exec, const dim<2> &size, - IndicesArray &&permutation_indices, - const mask_type &enabled_permute = row_permute) + Permutation(std::shared_ptr exec, const dim<2>& size, + IndicesArray&& permutation_indices, + const mask_type& enabled_permute = row_permute) : EnableLinOp(exec, size), permutation_{exec, std::forward(permutation_indices)}, row_size_(size[0]), @@ -192,7 +192,7 @@ class Permutation : public EnableLinOp>, } } - void apply_impl(const LinOp *in, LinOp *out) const + void apply_impl(const LinOp* in, LinOp* out) const { auto perm = as>(in); std::unique_ptr tmp{}; @@ -225,8 +225,8 @@ class Permutation : public EnableLinOp>, } - void apply_impl(const LinOp *, const LinOp *in, const LinOp *, - LinOp *out) const + void apply_impl(const LinOp*, const LinOp* in, const LinOp*, + LinOp* out) const { // Ignores alpha and beta and just performs a normal permutation as an // advanced apply does not really make sense here. diff --git a/include/ginkgo/core/matrix/sellp.hpp b/include/ginkgo/core/matrix/sellp.hpp index d2dff8199bb..7fa40425034 100644 --- a/include/ginkgo/core/matrix/sellp.hpp +++ b/include/ginkgo/core/matrix/sellp.hpp @@ -94,21 +94,21 @@ class Sellp : public EnableLinOp>, friend class Sellp, IndexType>; void convert_to( - Sellp, IndexType> *result) const override; + Sellp, IndexType>* result) const override; - void move_to(Sellp, IndexType> *result) override; + void move_to(Sellp, IndexType>* result) override; - void convert_to(Dense *other) const override; + void convert_to(Dense* other) const override; - void move_to(Dense *other) override; + void move_to(Dense* other) override; - void convert_to(Csr *other) const override; + void convert_to(Csr* other) const override; - void move_to(Csr *other) override; + void move_to(Csr* other) override; - void read(const mat_data &data) override; + void read(const mat_data& data) override; - void write(mat_data &data) const override; + void write(mat_data& data) const override; std::unique_ptr> extract_diagonal() const override; @@ -121,7 +121,7 @@ class Sellp : public EnableLinOp>, * * @return the values of the matrix. */ - value_type *get_values() noexcept { return values_.get_data(); } + value_type* get_values() noexcept { return values_.get_data(); } /** * @copydoc Sellp::get_values() @@ -130,7 +130,7 @@ class Sellp : public EnableLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type *get_const_values() const noexcept + const value_type* get_const_values() const noexcept { return values_.get_const_data(); } @@ -140,7 +140,7 @@ class Sellp : public EnableLinOp>, * * @return the column indexes of the matrix. */ - index_type *get_col_idxs() noexcept { return col_idxs_.get_data(); } + index_type* get_col_idxs() noexcept { return col_idxs_.get_data(); } /** * @copydoc Sellp::get_col_idxs() @@ -149,7 +149,7 @@ class Sellp : public EnableLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type *get_const_col_idxs() const noexcept + const index_type* get_const_col_idxs() const noexcept { return col_idxs_.get_const_data(); } @@ -159,7 +159,7 @@ class Sellp : public EnableLinOp>, * * @return the lengths(columns) of slices. */ - size_type *get_slice_lengths() noexcept + size_type* get_slice_lengths() noexcept { return slice_lengths_.get_data(); } @@ -171,7 +171,7 @@ class Sellp : public EnableLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const size_type *get_const_slice_lengths() const noexcept + const size_type* get_const_slice_lengths() const noexcept { return slice_lengths_.get_const_data(); } @@ -181,7 +181,7 @@ class Sellp : public EnableLinOp>, * * @return the offsets of slices. */ - size_type *get_slice_sets() noexcept { return slice_sets_.get_data(); } + size_type* get_slice_sets() noexcept { return slice_sets_.get_data(); } /** * @copydoc Sellp::get_slice_sets() @@ -190,7 +190,7 @@ class Sellp : public EnableLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const size_type *get_const_slice_sets() const noexcept + const size_type* get_const_slice_sets() const noexcept { return slice_sets_.get_const_data(); } @@ -238,7 +238,7 @@ class Sellp : public EnableLinOp>, * stored at (e.g. trying to call this method on a GPU matrix from * the CPU results in a runtime error) */ - value_type &val_at(size_type row, size_type slice_set, + value_type& val_at(size_type row, size_type slice_set, size_type idx) noexcept { return values_.get_data()[this->linearize_index(row, slice_set, idx)]; @@ -266,7 +266,7 @@ class Sellp : public EnableLinOp>, * stored at (e.g. trying to call this method on a GPU matrix from * the CPU results in a runtime error) */ - index_type &col_at(size_type row, size_type slice_set, + index_type& col_at(size_type row, size_type slice_set, size_type idx) noexcept { return this->get_col_idxs()[this->linearize_index(row, slice_set, idx)]; @@ -291,7 +291,7 @@ class Sellp : public EnableLinOp>, * @param exec Executor associated to the matrix * @param size size of the matrix */ - Sellp(std::shared_ptr exec, const dim<2> &size = dim<2>{}) + Sellp(std::shared_ptr exec, const dim<2>& size = dim<2>{}) : Sellp(std::move(exec), size, ceildiv(size[0], default_slice_size) * size[1]) {} @@ -304,7 +304,7 @@ class Sellp : public EnableLinOp>, * @param size size of the matrix * @param total_cols number of the sum of all cols in every slice. */ - Sellp(std::shared_ptr exec, const dim<2> &size, + Sellp(std::shared_ptr exec, const dim<2>& size, size_type total_cols) : Sellp(std::move(exec), size, default_slice_size, default_stride_factor, total_cols) @@ -320,7 +320,7 @@ class Sellp : public EnableLinOp>, * should be multiples of the stride_factor) * @param total_cols number of the sum of all cols in every slice. */ - Sellp(std::shared_ptr exec, const dim<2> &size, + Sellp(std::shared_ptr exec, const dim<2>& size, size_type slice_size, size_type stride_factor, size_type total_cols) : EnableLinOp(exec, size), values_(exec, slice_size * total_cols), @@ -332,10 +332,10 @@ class Sellp : public EnableLinOp>, total_cols_(total_cols) {} - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; size_type linearize_index(size_type row, size_type slice_set, size_type col) const noexcept diff --git a/include/ginkgo/core/matrix/sparsity_csr.hpp b/include/ginkgo/core/matrix/sparsity_csr.hpp index bce6621dd7d..3ef651bf22a 100644 --- a/include/ginkgo/core/matrix/sparsity_csr.hpp +++ b/include/ginkgo/core/matrix/sparsity_csr.hpp @@ -93,9 +93,9 @@ class SparsityCsr using transposed_type = SparsityCsr; using mat_data = matrix_data; - void read(const mat_data &data) override; + void read(const mat_data& data) override; - void write(mat_data &data) const override; + void write(mat_data& data) const override; std::unique_ptr transpose() const override; @@ -129,7 +129,7 @@ class SparsityCsr * * @return the column indices of the matrix. */ - index_type *get_col_idxs() noexcept { return col_idxs_.get_data(); } + index_type* get_col_idxs() noexcept { return col_idxs_.get_data(); } /** * @copydoc SparsityCsr::get_col_idxs() @@ -138,7 +138,7 @@ class SparsityCsr * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type *get_const_col_idxs() const noexcept + const index_type* get_const_col_idxs() const noexcept { return col_idxs_.get_const_data(); } @@ -148,7 +148,7 @@ class SparsityCsr * * @return the row pointers of the matrix. */ - index_type *get_row_ptrs() noexcept { return row_ptrs_.get_data(); } + index_type* get_row_ptrs() noexcept { return row_ptrs_.get_data(); } /** * @copydoc SparsityCsr::get_row_ptrs() @@ -157,7 +157,7 @@ class SparsityCsr * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type *get_const_row_ptrs() const noexcept + const index_type* get_const_row_ptrs() const noexcept { return row_ptrs_.get_const_data(); } @@ -167,7 +167,7 @@ class SparsityCsr * * @return the value of the matrix. */ - value_type *get_value() noexcept { return value_.get_data(); } + value_type* get_value() noexcept { return value_.get_data(); } /** * @copydoc SparsityCsr::get_value() @@ -176,7 +176,7 @@ class SparsityCsr * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type *get_const_value() const noexcept + const value_type* get_const_value() const noexcept { return value_.get_const_data(); } @@ -201,7 +201,7 @@ class SparsityCsr * @param num_nonzeros number of nonzeros */ SparsityCsr(std::shared_ptr exec, - const dim<2> &size = dim<2>{}, size_type num_nonzeros = {}) + const dim<2>& size = dim<2>{}, size_type num_nonzeros = {}) : EnableLinOp(exec, size), col_idxs_(exec, num_nonzeros), row_ptrs_(exec, size[0] + 1), @@ -228,8 +228,8 @@ class SparsityCsr * matrix. */ template - SparsityCsr(std::shared_ptr exec, const dim<2> &size, - ColIdxsArray &&col_idxs, RowPtrsArray &&row_ptrs, + SparsityCsr(std::shared_ptr exec, const dim<2>& size, + ColIdxsArray&& col_idxs, RowPtrsArray&& row_ptrs, value_type value = one()) : EnableLinOp(exec, size), col_idxs_{exec, std::forward(col_idxs)}, @@ -254,10 +254,10 @@ class SparsityCsr this->copy_from(std::move(tmp_.get())); } - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; private: Array col_idxs_; diff --git a/include/ginkgo/core/multigrid/amgx_pgm.hpp b/include/ginkgo/core/multigrid/amgx_pgm.hpp index 4afc049aa60..b5e31e8db8e 100644 --- a/include/ginkgo/core/multigrid/amgx_pgm.hpp +++ b/include/ginkgo/core/multigrid/amgx_pgm.hpp @@ -100,7 +100,7 @@ class AmgxPgm : public EnableLinOp>, * * @return the aggregate group. */ - IndexType *get_agg() noexcept { return agg_.get_data(); } + IndexType* get_agg() noexcept { return agg_.get_data(); } /** * @copydoc AmgxPgm::get_agg() @@ -109,7 +109,7 @@ class AmgxPgm : public EnableLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const IndexType *get_const_agg() const noexcept + const IndexType* get_const_agg() const noexcept { return agg_.get_const_data(); } @@ -144,13 +144,13 @@ class AmgxPgm : public EnableLinOp>, GKO_ENABLE_BUILD_METHOD(Factory); protected: - void apply_impl(const LinOp *b, LinOp *x) const override + void apply_impl(const LinOp* b, LinOp* x) const override { this->get_composition()->apply(b, x); } - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override { this->get_composition()->apply(alpha, b, beta, x); } @@ -159,7 +159,7 @@ class AmgxPgm : public EnableLinOp>, : EnableLinOp(std::move(exec)) {} - explicit AmgxPgm(const Factory *factory, + explicit AmgxPgm(const Factory* factory, std::shared_ptr system_matrix) : EnableLinOp(factory->get_executor(), system_matrix->get_size()), diff --git a/include/ginkgo/core/preconditioner/ic.hpp b/include/ginkgo/core/preconditioner/ic.hpp index dcc35a9b317..7f28b7e0201 100644 --- a/include/ginkgo/core/preconditioner/ic.hpp +++ b/include/ginkgo/core/preconditioner/ic.hpp @@ -189,7 +189,7 @@ class Ic : public EnableLinOp>, public Transposable { } protected: - void apply_impl(const LinOp *b, LinOp *x) const override + void apply_impl(const LinOp* b, LinOp* x) const override { // take care of real-to-complex apply precision_dispatch_real_complex( @@ -204,8 +204,8 @@ class Ic : public EnableLinOp>, public Transposable { b, x); } - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override { precision_dispatch_real_complex( [&](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { @@ -221,7 +221,7 @@ class Ic : public EnableLinOp>, public Transposable { : EnableLinOp(std::move(exec)) {} - explicit Ic(const Factory *factory, std::shared_ptr lin_op) + explicit Ic(const Factory* factory, std::shared_ptr lin_op) : EnableLinOp(factory->get_executor(), lin_op->get_size()), parameters_{factory->get_parameters()} { @@ -281,7 +281,7 @@ class Ic : public EnableLinOp>, public Transposable { * @param b Right hand side of the first solve. Also acts as the * initial guess, meaning the intermediate value will be a copy of b */ - void set_cache_to(const LinOp *b) const + void set_cache_to(const LinOp* b) const { if (cache_.intermediate == nullptr) { cache_.intermediate = @@ -302,8 +302,8 @@ class Ic : public EnableLinOp>, public Transposable { template static std::enable_if_t::value, std::unique_ptr> - generate_default_solver(const std::shared_ptr &exec, - const std::shared_ptr &mtx) + generate_default_solver(const std::shared_ptr& exec, + const std::shared_ptr& mtx) { constexpr gko::remove_complex default_reduce_residual{1e-4}; const unsigned int default_max_iters{ @@ -326,8 +326,8 @@ class Ic : public EnableLinOp>, public Transposable { template static std::enable_if_t::value, std::unique_ptr> - generate_default_solver(const std::shared_ptr &exec, - const std::shared_ptr &mtx) + generate_default_solver(const std::shared_ptr& exec, + const std::shared_ptr& mtx) { return SolverType::build().on(exec)->generate(mtx); } @@ -348,10 +348,10 @@ class Ic : public EnableLinOp>, public Transposable { mutable struct cache_struct { cache_struct() = default; ~cache_struct() = default; - cache_struct(const cache_struct &) {} - cache_struct(cache_struct &&) {} - cache_struct &operator=(const cache_struct &) { return *this; } - cache_struct &operator=(cache_struct &&) { return *this; } + cache_struct(const cache_struct&) {} + cache_struct(cache_struct&&) {} + cache_struct& operator=(const cache_struct&) { return *this; } + cache_struct& operator=(cache_struct&&) { return *this; } std::unique_ptr intermediate{}; } cache_; }; diff --git a/include/ginkgo/core/preconditioner/ilu.hpp b/include/ginkgo/core/preconditioner/ilu.hpp index 9eae5447af3..7c7aa4e0d9a 100644 --- a/include/ginkgo/core/preconditioner/ilu.hpp +++ b/include/ginkgo/core/preconditioner/ilu.hpp @@ -207,7 +207,7 @@ class Ilu : public EnableLinOp< } protected: - void apply_impl(const LinOp *b, LinOp *x) const override + void apply_impl(const LinOp* b, LinOp* x) const override { // take care of real-to-complex apply precision_dispatch_real_complex( @@ -230,8 +230,8 @@ class Ilu : public EnableLinOp< b, x); } - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override { precision_dispatch_real_complex( [&](auto dense_alpha, auto dense_b, auto dense_beta, auto dense_x) { @@ -253,7 +253,7 @@ class Ilu : public EnableLinOp< : EnableLinOp(std::move(exec)) {} - explicit Ilu(const Factory *factory, std::shared_ptr lin_op) + explicit Ilu(const Factory* factory, std::shared_ptr lin_op) : EnableLinOp(factory->get_executor(), lin_op->get_size()), parameters_{factory->get_parameters()} { @@ -309,7 +309,7 @@ class Ilu : public EnableLinOp< * @param b Right hand side of the first solve. Also acts as the initial * guess, meaning the intermediate value will be a copy of b */ - void set_cache_to(const LinOp *b) const + void set_cache_to(const LinOp* b) const { if (cache_.intermediate == nullptr) { cache_.intermediate = @@ -330,8 +330,8 @@ class Ilu : public EnableLinOp< template static std::enable_if_t::value, std::unique_ptr> - generate_default_solver(const std::shared_ptr &exec, - const std::shared_ptr &mtx) + generate_default_solver(const std::shared_ptr& exec, + const std::shared_ptr& mtx) { constexpr gko::remove_complex default_reduce_residual{1e-4}; const unsigned int default_max_iters{ @@ -354,8 +354,8 @@ class Ilu : public EnableLinOp< template static std::enable_if_t::value, std::unique_ptr> - generate_default_solver(const std::shared_ptr &exec, - const std::shared_ptr &mtx) + generate_default_solver(const std::shared_ptr& exec, + const std::shared_ptr& mtx) { return SolverType::build().on(exec)->generate(mtx); } @@ -376,10 +376,10 @@ class Ilu : public EnableLinOp< mutable struct cache_struct { cache_struct() = default; ~cache_struct() = default; - cache_struct(const cache_struct &) {} - cache_struct(cache_struct &&) {} - cache_struct &operator=(const cache_struct &) { return *this; } - cache_struct &operator=(cache_struct &&) { return *this; } + cache_struct(const cache_struct&) {} + cache_struct(cache_struct&&) {} + cache_struct& operator=(const cache_struct&) { return *this; } + cache_struct& operator=(cache_struct&&) { return *this; } std::unique_ptr intermediate{}; } cache_; }; diff --git a/include/ginkgo/core/preconditioner/isai.hpp b/include/ginkgo/core/preconditioner/isai.hpp index d3ff87d7b10..bc627b7ae9e 100644 --- a/include/ginkgo/core/preconditioner/isai.hpp +++ b/include/ginkgo/core/preconditioner/isai.hpp @@ -206,7 +206,7 @@ class Isai : public EnableLinOp>, * @param factory the factory to use to create the preconditoner * @param system_matrix the matrix for which an ISAI is to be computed */ - explicit Isai(const Factory *factory, + explicit Isai(const Factory* factory, std::shared_ptr system_matrix) : EnableLinOp(factory->get_executor(), system_matrix->get_size()), parameters_{factory->get_parameters()} @@ -223,13 +223,13 @@ class Isai : public EnableLinOp>, } } - void apply_impl(const LinOp *b, LinOp *x) const override + void apply_impl(const LinOp* b, LinOp* x) const override { approximate_inverse_->apply(b, x); } - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override { approximate_inverse_->apply(alpha, b, beta, x); } diff --git a/include/ginkgo/core/preconditioner/jacobi.hpp b/include/ginkgo/core/preconditioner/jacobi.hpp index 76e571baf10..970721820e6 100644 --- a/include/ginkgo/core/preconditioner/jacobi.hpp +++ b/include/ginkgo/core/preconditioner/jacobi.hpp @@ -243,7 +243,7 @@ class Jacobi : public EnableLinOp>, * @internal * TODO: replace with ranges */ - const block_interleaved_storage_scheme &get_storage_scheme() + const block_interleaved_storage_scheme& get_storage_scheme() const noexcept { return storage_scheme_; @@ -260,7 +260,7 @@ class Jacobi : public EnableLinOp>, * @internal * TODO: replace with ranges */ - const value_type *get_blocks() const noexcept + const value_type* get_blocks() const noexcept { return blocks_.get_const_data(); } @@ -274,7 +274,7 @@ class Jacobi : public EnableLinOp>, * implementations of the standard non-adaptive variant are allowed to * omit the calculation of condition numbers. */ - const remove_complex *get_conditioning() const noexcept + const remove_complex* get_conditioning() const noexcept { return conditioning_.get_const_data(); } @@ -289,11 +289,11 @@ class Jacobi : public EnableLinOp>, return blocks_.get_num_elems(); } - void convert_to(matrix::Dense *result) const override; + void convert_to(matrix::Dense* result) const override; - void move_to(matrix::Dense *result) override; + void move_to(matrix::Dense* result) override; - void write(mat_data &data) const override; + void write(mat_data& data) const override; std::unique_ptr transpose() const override; @@ -378,13 +378,13 @@ class Jacobi : public EnableLinOp>, {} storage_optimization_type( - const Array &block_wise_opt) + const Array& block_wise_opt) : is_block_wise{block_wise_opt.get_num_elems() > 0}, block_wise{block_wise_opt} {} storage_optimization_type( - Array &&block_wise_opt) + Array&& block_wise_opt) : is_block_wise{block_wise_opt.get_num_elems() > 0}, block_wise{std::move(block_wise_opt)} {} @@ -522,7 +522,7 @@ class Jacobi : public EnableLinOp>, * @param system_matrix the matrix this preconditioner should be created * from */ - explicit Jacobi(const Factory *factory, + explicit Jacobi(const Factory* factory, std::shared_ptr system_matrix) : EnableLinOp(factory->get_executor(), gko::transpose(system_matrix->get_size())), @@ -592,7 +592,7 @@ class Jacobi : public EnableLinOp>, * skipped (therefore, marking that it is already * sorted) */ - void generate(const LinOp *system_matrix, bool skip_sorting); + void generate(const LinOp* system_matrix, bool skip_sorting); /** * Detects the diagonal blocks and allocates the memory needed to store the @@ -601,12 +601,12 @@ class Jacobi : public EnableLinOp>, * @param system_matrix the source matrix whose diagonal block pattern is * to be detected */ - void detect_blocks(const matrix::Csr *system_matrix); + void detect_blocks(const matrix::Csr* system_matrix); - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; private: block_interleaved_storage_scheme storage_scheme_{}; diff --git a/include/ginkgo/core/reorder/rcm.hpp b/include/ginkgo/core/reorder/rcm.hpp index 922df382f66..de335edc00e 100644 --- a/include/ginkgo/core/reorder/rcm.hpp +++ b/include/ginkgo/core/reorder/rcm.hpp @@ -148,14 +148,14 @@ class Rcm * Generates the permutation matrix and if required the inverse permutation * matrix. */ - void generate(std::shared_ptr &exec, + void generate(std::shared_ptr& exec, std::unique_ptr adjacency_matrix) const; explicit Rcm(std::shared_ptr exec) : EnablePolymorphicObject(std::move(exec)) {} - explicit Rcm(const Factory *factory, const ReorderingBaseArgs &args) + explicit Rcm(const Factory* factory, const ReorderingBaseArgs& args) : EnablePolymorphicObject(factory->get_executor()), parameters_{factory->get_parameters()} { diff --git a/include/ginkgo/core/reorder/reordering_base.hpp b/include/ginkgo/core/reorder/reordering_base.hpp index ac0efc8e446..a7111b69cf3 100644 --- a/include/ginkgo/core/reorder/reordering_base.hpp +++ b/include/ginkgo/core/reorder/reordering_base.hpp @@ -132,7 +132,7 @@ using EnableDefaultReorderingBaseFactory = #define GKO_ENABLE_REORDERING_BASE_FACTORY(_reordering_base, _parameters_name, \ _factory_name) \ public: \ - const _parameters_name##_type &get_##_parameters_name() const \ + const _parameters_name##_type& get_##_parameters_name() const \ { \ return _parameters_name##_; \ } \ @@ -150,7 +150,7 @@ public: \ std::move(exec)) \ {} \ explicit _factory_name(std::shared_ptr exec, \ - const _parameters_name##_type ¶meters) \ + const _parameters_name##_type& parameters) \ : ::gko::reorder::EnableDefaultReorderingBaseFactory< \ _factory_name, _reordering_base, _parameters_name##_type>( \ std::move(exec), parameters) \ diff --git a/include/ginkgo/core/solver/bicg.hpp b/include/ginkgo/core/solver/bicg.hpp index da57e30e732..176f33d9602 100644 --- a/include/ginkgo/core/solver/bicg.hpp +++ b/include/ginkgo/core/solver/bicg.hpp @@ -154,19 +154,19 @@ class Bicg : public EnableLinOp>, GKO_ENABLE_BUILD_METHOD(Factory); protected: - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_dense_impl(const matrix::Dense *b, - matrix::Dense *x) const; + void apply_dense_impl(const matrix::Dense* b, + matrix::Dense* x) const; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; explicit Bicg(std::shared_ptr exec) : EnableLinOp(std::move(exec)) {} - explicit Bicg(const Factory *factory, + explicit Bicg(const Factory* factory, std::shared_ptr system_matrix) : EnableLinOp(factory->get_executor(), gko::transpose(system_matrix->get_size())), diff --git a/include/ginkgo/core/solver/bicgstab.hpp b/include/ginkgo/core/solver/bicgstab.hpp index d842a4fbd31..53a8e161d20 100644 --- a/include/ginkgo/core/solver/bicgstab.hpp +++ b/include/ginkgo/core/solver/bicgstab.hpp @@ -152,19 +152,19 @@ class Bicgstab : public EnableLinOp>, GKO_ENABLE_BUILD_METHOD(Factory); protected: - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_dense_impl(const matrix::Dense *b, - matrix::Dense *x) const; + void apply_dense_impl(const matrix::Dense* b, + matrix::Dense* x) const; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; explicit Bicgstab(std::shared_ptr exec) : EnableLinOp(std::move(exec)) {} - explicit Bicgstab(const Factory *factory, + explicit Bicgstab(const Factory* factory, std::shared_ptr system_matrix) : EnableLinOp(factory->get_executor(), gko::transpose(system_matrix->get_size())), diff --git a/include/ginkgo/core/solver/cb_gmres.hpp b/include/ginkgo/core/solver/cb_gmres.hpp index 4ee9fe18711..9175570f058 100644 --- a/include/ginkgo/core/solver/cb_gmres.hpp +++ b/include/ginkgo/core/solver/cb_gmres.hpp @@ -197,19 +197,19 @@ class CbGmres : public EnableLinOp>, GKO_ENABLE_BUILD_METHOD(Factory); protected: - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_dense_impl(const matrix::Dense *b, - matrix::Dense *x) const; + void apply_dense_impl(const matrix::Dense* b, + matrix::Dense* x) const; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; explicit CbGmres(std::shared_ptr exec) : EnableLinOp(std::move(exec)) {} - explicit CbGmres(const Factory *factory, + explicit CbGmres(const Factory* factory, std::shared_ptr system_matrix) : EnableLinOp(factory->get_executor(), transpose(system_matrix->get_size())), diff --git a/include/ginkgo/core/solver/cg.hpp b/include/ginkgo/core/solver/cg.hpp index d84df60906d..e66f743d9ac 100644 --- a/include/ginkgo/core/solver/cg.hpp +++ b/include/ginkgo/core/solver/cg.hpp @@ -148,19 +148,19 @@ class Cg : public EnableLinOp>, GKO_ENABLE_BUILD_METHOD(Factory); protected: - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_dense_impl(const matrix::Dense *b, - matrix::Dense *x) const; + void apply_dense_impl(const matrix::Dense* b, + matrix::Dense* x) const; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; explicit Cg(std::shared_ptr exec) : EnableLinOp(std::move(exec)) {} - explicit Cg(const Factory *factory, + explicit Cg(const Factory* factory, std::shared_ptr system_matrix) : EnableLinOp(factory->get_executor(), gko::transpose(system_matrix->get_size())), diff --git a/include/ginkgo/core/solver/cgs.hpp b/include/ginkgo/core/solver/cgs.hpp index 14282bb6c8f..329fa6bf7db 100644 --- a/include/ginkgo/core/solver/cgs.hpp +++ b/include/ginkgo/core/solver/cgs.hpp @@ -145,19 +145,19 @@ class Cgs : public EnableLinOp>, GKO_ENABLE_BUILD_METHOD(Factory); protected: - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_dense_impl(const matrix::Dense *b, - matrix::Dense *x) const; + void apply_dense_impl(const matrix::Dense* b, + matrix::Dense* x) const; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; explicit Cgs(std::shared_ptr exec) : EnableLinOp(std::move(exec)) {} - explicit Cgs(const Factory *factory, + explicit Cgs(const Factory* factory, std::shared_ptr system_matrix) : EnableLinOp(factory->get_executor(), gko::transpose(system_matrix->get_size())), diff --git a/include/ginkgo/core/solver/fcg.hpp b/include/ginkgo/core/solver/fcg.hpp index 55ded85f667..894ab38b463 100644 --- a/include/ginkgo/core/solver/fcg.hpp +++ b/include/ginkgo/core/solver/fcg.hpp @@ -153,19 +153,19 @@ class Fcg : public EnableLinOp>, GKO_ENABLE_BUILD_METHOD(Factory); protected: - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_dense_impl(const matrix::Dense *b, - matrix::Dense *x) const; + void apply_dense_impl(const matrix::Dense* b, + matrix::Dense* x) const; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; explicit Fcg(std::shared_ptr exec) : EnableLinOp(std::move(exec)) {} - explicit Fcg(const Factory *factory, + explicit Fcg(const Factory* factory, std::shared_ptr system_matrix) : EnableLinOp(factory->get_executor(), gko::transpose(system_matrix->get_size())), diff --git a/include/ginkgo/core/solver/gmres.hpp b/include/ginkgo/core/solver/gmres.hpp index a24e42392d3..ab3bcde9c5d 100644 --- a/include/ginkgo/core/solver/gmres.hpp +++ b/include/ginkgo/core/solver/gmres.hpp @@ -167,19 +167,19 @@ class Gmres : public EnableLinOp>, GKO_ENABLE_BUILD_METHOD(Factory); protected: - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_dense_impl(const matrix::Dense *b, - matrix::Dense *x) const; + void apply_dense_impl(const matrix::Dense* b, + matrix::Dense* x) const; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; explicit Gmres(std::shared_ptr exec) : EnableLinOp(std::move(exec)) {} - explicit Gmres(const Factory *factory, + explicit Gmres(const Factory* factory, std::shared_ptr system_matrix) : EnableLinOp(factory->get_executor(), gko::transpose(system_matrix->get_size())), diff --git a/include/ginkgo/core/solver/idr.hpp b/include/ginkgo/core/solver/idr.hpp index 45551eea415..0fdcbadb933 100644 --- a/include/ginkgo/core/solver/idr.hpp +++ b/include/ginkgo/core/solver/idr.hpp @@ -245,20 +245,20 @@ class Idr : public EnableLinOp>, GKO_ENABLE_BUILD_METHOD(Factory); protected: - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; template - void iterate(const matrix::Dense *dense_b, - matrix::Dense *dense_x) const; + void iterate(const matrix::Dense* dense_b, + matrix::Dense* dense_x) const; explicit Idr(std::shared_ptr exec) : EnableLinOp(std::move(exec)) {} - explicit Idr(const Factory *factory, + explicit Idr(const Factory* factory, std::shared_ptr system_matrix) : EnableLinOp(factory->get_executor(), gko::transpose(system_matrix->get_size())), diff --git a/include/ginkgo/core/solver/ir.hpp b/include/ginkgo/core/solver/ir.hpp index 16ca3e1d3fa..0e5421ed906 100644 --- a/include/ginkgo/core/solver/ir.hpp +++ b/include/ginkgo/core/solver/ir.hpp @@ -203,19 +203,19 @@ class Ir : public EnableLinOp>, public Transposable { GKO_ENABLE_BUILD_METHOD(Factory); protected: - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_dense_impl(const matrix::Dense *b, - matrix::Dense *x) const; + void apply_dense_impl(const matrix::Dense* b, + matrix::Dense* x) const; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; explicit Ir(std::shared_ptr exec) : EnableLinOp(std::move(exec)) {} - explicit Ir(const Factory *factory, + explicit Ir(const Factory* factory, std::shared_ptr system_matrix) : EnableLinOp(factory->get_executor(), gko::transpose(system_matrix->get_size())), diff --git a/include/ginkgo/core/solver/lower_trs.hpp b/include/ginkgo/core/solver/lower_trs.hpp index c3b95702fd0..ab375c05406 100644 --- a/include/ginkgo/core/solver/lower_trs.hpp +++ b/include/ginkgo/core/solver/lower_trs.hpp @@ -125,10 +125,10 @@ class LowerTrs : public EnableLinOp>, protected: void init_trs_solve_struct(); - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; /** * Generates the analysis structure from the system matrix and the right @@ -140,7 +140,7 @@ class LowerTrs : public EnableLinOp>, : EnableLinOp(std::move(exec)) {} - explicit LowerTrs(const Factory *factory, + explicit LowerTrs(const Factory* factory, std::shared_ptr system_matrix) : EnableLinOp(factory->get_executor(), gko::transpose(system_matrix->get_size())), diff --git a/include/ginkgo/core/solver/upper_trs.hpp b/include/ginkgo/core/solver/upper_trs.hpp index dd82b5df6e4..66f388d4823 100644 --- a/include/ginkgo/core/solver/upper_trs.hpp +++ b/include/ginkgo/core/solver/upper_trs.hpp @@ -125,10 +125,10 @@ class UpperTrs : public EnableLinOp>, protected: void init_trs_solve_struct(); - void apply_impl(const LinOp *b, LinOp *x) const override; + void apply_impl(const LinOp* b, LinOp* x) const override; - void apply_impl(const LinOp *alpha, const LinOp *b, const LinOp *beta, - LinOp *x) const override; + void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta, + LinOp* x) const override; /** * Generates the analysis structure from the system matrix and the right @@ -140,7 +140,7 @@ class UpperTrs : public EnableLinOp>, : EnableLinOp(std::move(exec)) {} - explicit UpperTrs(const Factory *factory, + explicit UpperTrs(const Factory* factory, std::shared_ptr system_matrix) : EnableLinOp(factory->get_executor(), gko::transpose(system_matrix->get_size())), diff --git a/include/ginkgo/core/stop/combined.hpp b/include/ginkgo/core/stop/combined.hpp index d993e0ffbc7..677d1abdf37 100644 --- a/include/ginkgo/core/stop/combined.hpp +++ b/include/ginkgo/core/stop/combined.hpp @@ -75,18 +75,18 @@ class Combined : public EnablePolymorphicObject { protected: bool check_impl(uint8 stoppingId, bool setFinalized, - Array *stop_status, bool *one_changed, - const Updater &) override; + Array* stop_status, bool* one_changed, + const Updater&) override; explicit Combined(std::shared_ptr exec) : EnablePolymorphicObject(std::move(exec)) {} - explicit Combined(const Factory *factory, const CriterionArgs &args) + explicit Combined(const Factory* factory, const CriterionArgs& args) : EnablePolymorphicObject(factory->get_executor()), parameters_{factory->get_parameters()} { - for (const auto &f : parameters_.criteria) { + for (const auto& f : parameters_.criteria) { // Ignore the nullptr from the list if (f != nullptr) { criteria_.push_back(f->generate(args)); @@ -120,7 +120,7 @@ class Combined : public EnablePolymorphicObject { * @ingroup stop */ template -std::shared_ptr combine(FactoryContainer &&factories) +std::shared_ptr combine(FactoryContainer&& factories) { switch (factories.size()) { case 0: diff --git a/include/ginkgo/core/stop/criterion.hpp b/include/ginkgo/core/stop/criterion.hpp index 5a3740a5c92..eae323d6bbb 100644 --- a/include/ginkgo/core/stop/criterion.hpp +++ b/include/ginkgo/core/stop/criterion.hpp @@ -87,17 +87,17 @@ class Criterion : public EnableAbstractPolymorphicObject { * This is to enforce the use of argument passing and calling check at * the same time. */ - Updater(const Updater &) = delete; - Updater(Updater &&) = delete; - Updater &operator=(const Updater &) = delete; - Updater &operator=(Updater &&) = delete; + Updater(const Updater&) = delete; + Updater(Updater&&) = delete; + Updater& operator=(const Updater&) = delete; + Updater& operator=(Updater&&) = delete; /** * Calls the parent Criterion object's check method * @copydoc Criterion::check(uint8, bool, Array, bool) */ bool check(uint8 stopping_id, bool set_finalized, - Array *stop_status, bool *one_changed) const + Array* stop_status, bool* one_changed) const { auto converged = parent_->check(stopping_id, set_finalized, stop_status, one_changed, *this); @@ -108,7 +108,7 @@ class Criterion : public EnableAbstractPolymorphicObject { * Helper macro to add parameters and setters to updater */ #define GKO_UPDATER_REGISTER_PARAMETER(_type, _name) \ - const Updater &_name(_type const &value) const \ + const Updater& _name(_type const& value) const \ { \ _name##_ = value; \ return *this; \ @@ -116,18 +116,17 @@ class Criterion : public EnableAbstractPolymorphicObject { mutable _type _name##_ {} GKO_UPDATER_REGISTER_PARAMETER(size_type, num_iterations); - GKO_UPDATER_REGISTER_PARAMETER(const LinOp *, residual); - GKO_UPDATER_REGISTER_PARAMETER(const LinOp *, residual_norm); - GKO_UPDATER_REGISTER_PARAMETER(const LinOp *, - implicit_sq_residual_norm); - GKO_UPDATER_REGISTER_PARAMETER(const LinOp *, solution); + GKO_UPDATER_REGISTER_PARAMETER(const LinOp*, residual); + GKO_UPDATER_REGISTER_PARAMETER(const LinOp*, residual_norm); + GKO_UPDATER_REGISTER_PARAMETER(const LinOp*, implicit_sq_residual_norm); + GKO_UPDATER_REGISTER_PARAMETER(const LinOp*, solution); #undef GKO_UPDATER_REGISTER_PARAMETER private: - Updater(Criterion *parent) : parent_{parent} {} + Updater(Criterion* parent) : parent_{parent} {} - Criterion *parent_; + Criterion* parent_; }; /** @@ -151,8 +150,8 @@ class Criterion : public EnableAbstractPolymorphicObject { * @returns whether convergence was completely reached */ bool check(uint8 stopping_id, bool set_finalized, - Array *stop_status, bool *one_changed, - const Updater &updater) + Array* stop_status, bool* one_changed, + const Updater& updater) { this->template log( this, updater.num_iterations_, updater.residual_, @@ -186,8 +185,8 @@ class Criterion : public EnableAbstractPolymorphicObject { * @returns whether convergence was completely reached */ virtual bool check_impl(uint8 stopping_id, bool set_finalized, - Array *stop_status, - bool *one_changed, const Updater &updater) = 0; + Array* stop_status, + bool* one_changed, const Updater& updater) = 0; /** * This is a helper function which properly sets all elements of the @@ -200,7 +199,7 @@ class Criterion : public EnableAbstractPolymorphicObject { * @param stop_status status of the stopping criterion */ void set_all_statuses(uint8 stopping_id, bool set_finalized, - Array *stop_status); + Array* stop_status); explicit Criterion(std::shared_ptr exec) : EnableAbstractPolymorphicObject(exec) @@ -221,13 +220,13 @@ class Criterion : public EnableAbstractPolymorphicObject { struct CriterionArgs { std::shared_ptr system_matrix; std::shared_ptr b; - const LinOp *x; - const LinOp *initial_residual; + const LinOp* x; + const LinOp* initial_residual; CriterionArgs(std::shared_ptr system_matrix, - std::shared_ptr b, const LinOp *x, - const LinOp *initial_residual = nullptr) + std::shared_ptr b, const LinOp* x, + const LinOp* initial_residual = nullptr) : system_matrix{system_matrix}, b{b}, x{x}, @@ -291,7 +290,7 @@ using EnableDefaultCriterionFactory = #define GKO_ENABLE_CRITERION_FACTORY(_criterion, _parameters_name, \ _factory_name) \ public: \ - const _parameters_name##_type &get_##_parameters_name() const \ + const _parameters_name##_type& get_##_parameters_name() const \ { \ return _parameters_name##_; \ } \ @@ -309,7 +308,7 @@ public: \ std::move(exec)) \ {} \ explicit _factory_name(std::shared_ptr exec, \ - const _parameters_name##_type ¶meters) \ + const _parameters_name##_type& parameters) \ : ::gko::stop::EnableDefaultCriterionFactory< \ _factory_name, _criterion, _parameters_name##_type>( \ std::move(exec), parameters) \ diff --git a/include/ginkgo/core/stop/iteration.hpp b/include/ginkgo/core/stop/iteration.hpp index 1efc1c5301e..f54f5876905 100644 --- a/include/ginkgo/core/stop/iteration.hpp +++ b/include/ginkgo/core/stop/iteration.hpp @@ -65,14 +65,14 @@ class Iteration : public EnablePolymorphicObject { protected: bool check_impl(uint8 stoppingId, bool setFinalized, - Array *stop_status, bool *one_changed, - const Updater &updater) override; + Array* stop_status, bool* one_changed, + const Updater& updater) override; explicit Iteration(std::shared_ptr exec) : EnablePolymorphicObject(std::move(exec)) {} - explicit Iteration(const Factory *factory, const CriterionArgs &args) + explicit Iteration(const Factory* factory, const CriterionArgs& args) : EnablePolymorphicObject( factory->get_executor()), parameters_{factory->get_parameters()} diff --git a/include/ginkgo/core/stop/residual_norm.hpp b/include/ginkgo/core/stop/residual_norm.hpp index 8dbbdc88544..68afd0154c1 100644 --- a/include/ginkgo/core/stop/residual_norm.hpp +++ b/include/ginkgo/core/stop/residual_norm.hpp @@ -85,8 +85,8 @@ class ResidualNormBase using NormVector = matrix::Dense>; using Vector = matrix::Dense; bool check_impl(uint8 stoppingId, bool setFinalized, - Array *stop_status, bool *one_changed, - const Criterion::Updater &updater) override; + Array* stop_status, bool* one_changed, + const Criterion::Updater& updater) override; explicit ResidualNormBase(std::shared_ptr exec) : EnablePolymorphicObject(exec), @@ -94,7 +94,7 @@ class ResidualNormBase {} explicit ResidualNormBase(std::shared_ptr exec, - const CriterionArgs &args, + const CriterionArgs& args, remove_complex reduction_factor, mode baseline) : EnablePolymorphicObject(exec), @@ -133,8 +133,7 @@ class ResidualNormBase } else { this->starting_tau_ = NormVector::create( exec, dim<2>{1, args.initial_residual->get_size()[1]}); - if (dynamic_cast( - args.initial_residual)) { + if (dynamic_cast(args.initial_residual)) { auto dense_r = as(args.initial_residual); dense_r->compute_norm2(this->starting_tau_.get()); } else { @@ -150,7 +149,7 @@ class ResidualNormBase } this->starting_tau_ = NormVector::create(exec, dim<2>{1, args.b->get_size()[1]}); - if (dynamic_cast(args.b.get())) { + if (dynamic_cast(args.b.get())) { auto dense_rhs = as(args.b); dense_rhs->compute_norm2(this->starting_tau_.get()); } else { @@ -239,7 +238,7 @@ class ResidualNorm : public ResidualNormBase { : ResidualNormBase(exec) {} - explicit ResidualNorm(const Factory *factory, const CriterionArgs &args) + explicit ResidualNorm(const Factory* factory, const CriterionArgs& args) : ResidualNormBase( factory->get_executor(), args, factory->get_parameters().reduction_factor, @@ -295,15 +294,15 @@ class ImplicitResidualNorm : public ResidualNormBase { // check_impl needs to be overwritten again since we focus on the implicit // residual here bool check_impl(uint8 stoppingId, bool setFinalized, - Array *stop_status, bool *one_changed, - const Criterion::Updater &updater) override; + Array* stop_status, bool* one_changed, + const Criterion::Updater& updater) override; explicit ImplicitResidualNorm(std::shared_ptr exec) : ResidualNormBase(exec) {} - explicit ImplicitResidualNorm(const Factory *factory, - const CriterionArgs &args) + explicit ImplicitResidualNorm(const Factory* factory, + const CriterionArgs& args) : ResidualNormBase( factory->get_executor(), args, factory->get_parameters().reduction_factor, @@ -356,8 +355,8 @@ class ResidualNormReduction : public ResidualNormBase { : ResidualNormBase(exec) {} - explicit ResidualNormReduction(const Factory *factory, - const CriterionArgs &args) + explicit ResidualNormReduction(const Factory* factory, + const CriterionArgs& args) : ResidualNormBase( factory->get_executor(), args, factory->get_parameters().reduction_factor, @@ -409,8 +408,8 @@ class RelativeResidualNorm : public ResidualNormBase { : ResidualNormBase(exec) {} - explicit RelativeResidualNorm(const Factory *factory, - const CriterionArgs &args) + explicit RelativeResidualNorm(const Factory* factory, + const CriterionArgs& args) : ResidualNormBase(factory->get_executor(), args, factory->get_parameters().tolerance, mode::rhs_norm), @@ -459,8 +458,8 @@ class AbsoluteResidualNorm : public ResidualNormBase { : ResidualNormBase(exec) {} - explicit AbsoluteResidualNorm(const Factory *factory, - const CriterionArgs &args) + explicit AbsoluteResidualNorm(const Factory* factory, + const CriterionArgs& args) : ResidualNormBase(factory->get_executor(), args, factory->get_parameters().tolerance, mode::absolute), diff --git a/include/ginkgo/core/stop/stopping_status.hpp b/include/ginkgo/core/stop/stopping_status.hpp index 665ab90c331..275e540a8f6 100644 --- a/include/ginkgo/core/stop/stopping_status.hpp +++ b/include/ginkgo/core/stop/stopping_status.hpp @@ -48,9 +48,9 @@ namespace gko { */ class stopping_status { friend GKO_ATTRIBUTES GKO_INLINE bool operator==( - const stopping_status &x, const stopping_status &y) noexcept; + const stopping_status& x, const stopping_status& y) noexcept; friend GKO_ATTRIBUTES GKO_INLINE bool operator!=( - const stopping_status &x, const stopping_status &y) noexcept; + const stopping_status& x, const stopping_status& y) noexcept; public: /** @@ -159,8 +159,8 @@ class stopping_status { * @return true if and only if both `x` and `y` have the same mask and converged * and finalized state */ -GKO_ATTRIBUTES GKO_INLINE bool operator==(const stopping_status &x, - const stopping_status &y) noexcept +GKO_ATTRIBUTES GKO_INLINE bool operator==(const stopping_status& x, + const stopping_status& y) noexcept { return x.data_ == y.data_; } @@ -174,8 +174,8 @@ GKO_ATTRIBUTES GKO_INLINE bool operator==(const stopping_status &x, * * @return true if and only if `!(x == y)` */ -GKO_ATTRIBUTES GKO_INLINE bool operator!=(const stopping_status &x, - const stopping_status &y) noexcept +GKO_ATTRIBUTES GKO_INLINE bool operator!=(const stopping_status& x, + const stopping_status& y) noexcept { return x.data_ != y.data_; } diff --git a/include/ginkgo/core/stop/time.hpp b/include/ginkgo/core/stop/time.hpp index bba6bf2b9d4..1bb73bb1d7e 100644 --- a/include/ginkgo/core/stop/time.hpp +++ b/include/ginkgo/core/stop/time.hpp @@ -68,8 +68,8 @@ class Time : public EnablePolymorphicObject { protected: bool check_impl(uint8 stoppingId, bool setFinalized, - Array *stop_status, bool *one_changed, - const Updater &) override; + Array* stop_status, bool* one_changed, + const Updater&) override; explicit Time(std::shared_ptr exec) : EnablePolymorphicObject(std::move(exec)), @@ -77,7 +77,7 @@ class Time : public EnablePolymorphicObject { start_{} {} - explicit Time(const Factory *factory, const CriterionArgs args) + explicit Time(const Factory* factory, const CriterionArgs args) : EnablePolymorphicObject(factory->get_executor()), parameters_{factory->get_parameters()}, time_limit_{std::chrono::duration( diff --git a/omp/base/kernel_launch.hpp b/omp/base/kernel_launch.hpp index ae7a3ab4f94..7df6ff4c313 100644 --- a/omp/base/kernel_launch.hpp +++ b/omp/base/kernel_launch.hpp @@ -43,7 +43,7 @@ namespace omp { template void run_kernel(std::shared_ptr exec, KernelFunction fn, - size_type size, KernelArgs &&... args) + size_type size, KernelArgs&&... args) { #pragma omp parallel for for (size_type i = 0; i < size; i++) { @@ -143,7 +143,7 @@ void run_kernel_impl(std::shared_ptr exec, KernelFunction fn, template void run_kernel(std::shared_ptr exec, KernelFunction fn, - dim<2> size, KernelArgs &&... args) + dim<2> size, KernelArgs&&... args) { run_kernel_impl(exec, fn, size, map_to_device(args)...); } diff --git a/omp/base/kernel_launch_solver.hpp b/omp/base/kernel_launch_solver.hpp index b3791dd245c..dd85ba21915 100644 --- a/omp/base/kernel_launch_solver.hpp +++ b/omp/base/kernel_launch_solver.hpp @@ -43,7 +43,7 @@ namespace omp { template typename device_unpack_solver_impl::type>::type -map_to_device_solver(T &¶m, size_type default_stride) +map_to_device_solver(T&& param, size_type default_stride) { return device_unpack_solver_impl::type>:: unpack(to_device_type_impl::map_to_device(param), default_stride); @@ -53,7 +53,7 @@ map_to_device_solver(T &¶m, size_type default_stride) template void run_kernel_solver(std::shared_ptr exec, KernelFunction fn, dim<2> size, size_type default_stride, - KernelArgs &&... args) + KernelArgs&&... args) { run_kernel_impl(exec, fn, size, map_to_device_solver(args, default_stride)...); diff --git a/omp/components/absolute_array.cpp b/omp/components/absolute_array.cpp index 5a1a5df503e..072989d9066 100644 --- a/omp/components/absolute_array.cpp +++ b/omp/components/absolute_array.cpp @@ -41,7 +41,7 @@ namespace components { template void inplace_absolute_array(std::shared_ptr exec, - ValueType *data, size_type n) + ValueType* data, size_type n) { #pragma omp parallel for for (size_type i = 0; i < n; ++i) { @@ -54,8 +54,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_INPLACE_ABSOLUTE_ARRAY_KERNEL); template void outplace_absolute_array(std::shared_ptr exec, - const ValueType *in, size_type n, - remove_complex *out) + const ValueType* in, size_type n, + remove_complex* out) { #pragma omp parallel for for (size_type i = 0; i < n; ++i) { diff --git a/omp/components/atomic.hpp b/omp/components/atomic.hpp index 5488355ac7f..7b4f225199f 100644 --- a/omp/components/atomic.hpp +++ b/omp/components/atomic.hpp @@ -47,20 +47,20 @@ namespace omp { template ()> * = nullptr> -void atomic_add(ValueType &out, ValueType val) + std::enable_if_t()>* = nullptr> +void atomic_add(ValueType& out, ValueType val) { #pragma omp atomic out += val; } template ()> * = nullptr> -void atomic_add(ValueType &out, ValueType val) + std::enable_if_t()>* = nullptr> +void atomic_add(ValueType& out, ValueType val) { // The C++ standard explicitly allows casting complex* to double* // [complex.numbers.general] - auto values = reinterpret_cast *>(&out); + auto values = reinterpret_cast*>(&out); #pragma omp atomic values[0] += real(val); #pragma omp atomic diff --git a/omp/components/csr_spgeam.hpp b/omp/components/csr_spgeam.hpp index 0429aed8a85..40ae2e7a950 100644 --- a/omp/components/csr_spgeam.hpp +++ b/omp/components/csr_spgeam.hpp @@ -59,8 +59,8 @@ namespace omp { */ template -void abstract_spgeam(const matrix::Csr *a, - const matrix::Csr *b, +void abstract_spgeam(const matrix::Csr* a, + const matrix::Csr* b, BeginCallback begin_cb, EntryCallback entry_cb, EndCallback end_cb) { diff --git a/omp/components/fill_array.cpp b/omp/components/fill_array.cpp index 5b489af4b8d..03408b151e8 100644 --- a/omp/components/fill_array.cpp +++ b/omp/components/fill_array.cpp @@ -40,7 +40,7 @@ namespace components { template -void fill_array(std::shared_ptr exec, ValueType *array, +void fill_array(std::shared_ptr exec, ValueType* array, size_type n, ValueType val) { #pragma omp parallel for @@ -54,7 +54,7 @@ GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_FILL_ARRAY_KERNEL); template void fill_seq_array(std::shared_ptr exec, - ValueType *array, size_type n) + ValueType* array, size_type n) { #pragma omp parallel for for (size_type i = 0; i < n; ++i) { diff --git a/omp/components/format_conversion.hpp b/omp/components/format_conversion.hpp index d634541c6f3..8f06b3b53a2 100644 --- a/omp/components/format_conversion.hpp +++ b/omp/components/format_conversion.hpp @@ -57,9 +57,9 @@ namespace omp { * matrix. */ template -inline void convert_unsorted_idxs_to_ptrs(const IndexType *idxs, +inline void convert_unsorted_idxs_to_ptrs(const IndexType* idxs, size_type num_nonzeros, - IndexType *ptrs, size_type length) + IndexType* ptrs, size_type length) { #pragma omp parallel for schedule(static, \ ceildiv(length, omp_get_max_threads())) @@ -85,8 +85,8 @@ inline void convert_unsorted_idxs_to_ptrs(const IndexType *idxs, * pointers when converting a coo matrix to a csr matrix. */ template -inline void convert_sorted_idxs_to_ptrs(const IndexType *idxs, - size_type num_nonzeros, IndexType *ptrs, +inline void convert_sorted_idxs_to_ptrs(const IndexType* idxs, + size_type num_nonzeros, IndexType* ptrs, size_type num_rows) { ptrs[0] = 0; @@ -112,8 +112,8 @@ inline void convert_sorted_idxs_to_ptrs(const IndexType *idxs, template -inline void convert_ptrs_to_idxs(const IndexType *ptrs, size_type num_rows, - IndexType *idxs) +inline void convert_ptrs_to_idxs(const IndexType* ptrs, size_type num_rows, + IndexType* idxs) { #pragma omp parallel for for (size_type row = 0; row < num_rows; ++row) { diff --git a/omp/components/matrix_operations.hpp b/omp/components/matrix_operations.hpp index 86c1c152ff8..20e8371eba5 100644 --- a/omp/components/matrix_operations.hpp +++ b/omp/components/matrix_operations.hpp @@ -53,7 +53,7 @@ namespace omp { template remove_complex compute_inf_norm(size_type num_rows, size_type num_cols, - const ValueType *matrix, + const ValueType* matrix, size_type stride) { auto result = zero>(); diff --git a/omp/components/omp_mutex.hpp b/omp/components/omp_mutex.hpp index 10b35c50d75..ca7b8cf8403 100644 --- a/omp/components/omp_mutex.hpp +++ b/omp/components/omp_mutex.hpp @@ -51,10 +51,10 @@ struct omp_mutex { omp_mutex() { omp_init_lock(&lock_); } ~omp_mutex() { omp_destroy_lock(&lock_); } - omp_mutex(const omp_mutex &) = delete; - omp_mutex(omp_mutex &&) = delete; - omp_mutex &operator=(const omp_mutex &) = delete; - omp_mutex &operator=(omp_mutex &&) = delete; + omp_mutex(const omp_mutex&) = delete; + omp_mutex(omp_mutex&&) = delete; + omp_mutex& operator=(const omp_mutex&) = delete; + omp_mutex& operator=(omp_mutex&&) = delete; void lock() { omp_set_lock(&lock_); } diff --git a/omp/components/prefix_sum.cpp b/omp/components/prefix_sum.cpp index 6ac9d4e9291..88e1d048844 100644 --- a/omp/components/prefix_sum.cpp +++ b/omp/components/prefix_sum.cpp @@ -53,7 +53,7 @@ namespace components { */ template void prefix_sum(std::shared_ptr exec, - IndexType *const counts, const size_type num_entries) + IndexType* const counts, const size_type num_entries) { // the operation only makes sense for arrays of size at least 2 if (num_entries < 2) { diff --git a/omp/components/sort_small.hpp b/omp/components/sort_small.hpp index 6cb5c905837..21c368a370b 100644 --- a/omp/components/sort_small.hpp +++ b/omp/components/sort_small.hpp @@ -57,7 +57,7 @@ namespace detail { #ifdef _MSC_VER template -inline void comparator(TargetType *arr, size_type x, size_type y, Functor comp) +inline void comparator(TargetType* arr, size_type x, size_type y, Functor comp) { const auto should_swap = !comp(arr[x], arr[y]); const auto tmp = arr[x]; @@ -66,7 +66,7 @@ inline void comparator(TargetType *arr, size_type x, size_type y, Functor comp) } #else template -inline void comparator(TargetType *arr, size_type x, size_type y, Functor comp) +inline void comparator(TargetType* arr, size_type x, size_type y, Functor comp) { const auto tx = arr[x]; const auto ty = arr[y]; @@ -87,7 +87,7 @@ inline void comparator(TargetType *arr, size_type x, size_type y, Functor comp) * For larger sizes delegates to std::sort. */ template -void sort_small(TargetType *arr, size_type n, Functor comp) +void sort_small(TargetType* arr, size_type n, Functor comp) { const auto sort2 = [&](size_type x, size_type y) { detail::comparator(arr, x, y, comp); diff --git a/omp/factorization/factorization_kernels.cpp b/omp/factorization/factorization_kernels.cpp index 09b2d5fd24a..f7242734182 100644 --- a/omp/factorization/factorization_kernels.cpp +++ b/omp/factorization/factorization_kernels.cpp @@ -85,8 +85,8 @@ struct find_helper { template void find_missing_diagonal_elements( - const matrix::Csr *mtx, - IndexType *elements_to_add_per_row, bool *changes_required) + const matrix::Csr* mtx, + IndexType* elements_to_add_per_row, bool* changes_required) { auto num_rows = static_cast(mtx->get_size()[0]); auto num_cols = static_cast(mtx->get_size()[1]); @@ -99,8 +99,8 @@ void find_missing_diagonal_elements( elements_to_add_per_row[row] = 0; continue; } - const auto *start_cols = col_idxs + row_ptrs[row]; - const auto *end_cols = col_idxs + row_ptrs[row + 1]; + const auto* start_cols = col_idxs + row_ptrs[row]; + const auto* end_cols = col_idxs + row_ptrs[row + 1]; if (detail::find_helper::find(start_cols, end_cols, row)) { elements_to_add_per_row[row] = 0; } else { @@ -113,10 +113,10 @@ void find_missing_diagonal_elements( template -void add_missing_diagonal_elements(const matrix::Csr *mtx, - ValueType *new_values, - IndexType *new_col_idxs, - const IndexType *row_ptrs_addition) +void add_missing_diagonal_elements(const matrix::Csr* mtx, + ValueType* new_values, + IndexType* new_col_idxs, + const IndexType* row_ptrs_addition) { const auto num_rows = static_cast(mtx->get_size()[0]); const auto old_values = mtx->get_const_values(); @@ -168,7 +168,7 @@ void add_missing_diagonal_elements(const matrix::Csr *mtx, template void add_diagonal_elements(std::shared_ptr exec, - matrix::Csr *mtx, + matrix::Csr* mtx, bool is_sorted) { auto mtx_size = mtx->get_size(); @@ -216,8 +216,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_row_ptrs_l_u( std::shared_ptr exec, - const matrix::Csr *system_matrix, - IndexType *l_row_ptrs, IndexType *u_row_ptrs) + const matrix::Csr* system_matrix, + IndexType* l_row_ptrs, IndexType* u_row_ptrs) { auto num_rows = system_matrix->get_size()[0]; auto row_ptrs = system_matrix->get_const_row_ptrs(); @@ -250,9 +250,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_l_u(std::shared_ptr exec, - const matrix::Csr *system_matrix, - matrix::Csr *csr_l, - matrix::Csr *csr_u) + const matrix::Csr* system_matrix, + matrix::Csr* csr_l, + matrix::Csr* csr_u) { const auto row_ptrs = system_matrix->get_const_row_ptrs(); const auto col_idxs = system_matrix->get_const_col_idxs(); @@ -306,8 +306,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_row_ptrs_l( std::shared_ptr exec, - const matrix::Csr *system_matrix, - IndexType *l_row_ptrs) + const matrix::Csr* system_matrix, + IndexType* l_row_ptrs) { auto num_rows = system_matrix->get_size()[0]; auto row_ptrs = system_matrix->get_const_row_ptrs(); @@ -336,8 +336,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_l(std::shared_ptr exec, - const matrix::Csr *system_matrix, - matrix::Csr *csr_l, bool diag_sqrt) + const matrix::Csr* system_matrix, + matrix::Csr* csr_l, bool diag_sqrt) { const auto row_ptrs = system_matrix->get_const_row_ptrs(); const auto col_idxs = system_matrix->get_const_col_idxs(); diff --git a/omp/factorization/ic_kernels.cpp b/omp/factorization/ic_kernels.cpp index b20f9c1da2a..c9fa296cca7 100644 --- a/omp/factorization/ic_kernels.cpp +++ b/omp/factorization/ic_kernels.cpp @@ -46,7 +46,7 @@ namespace ic_factorization { template void compute(std::shared_ptr exec, - matrix::Csr *m) GKO_NOT_IMPLEMENTED; + matrix::Csr* m) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_IC_COMPUTE_KERNEL); diff --git a/omp/factorization/ilu_kernels.cpp b/omp/factorization/ilu_kernels.cpp index 976c8129395..a4175693afe 100644 --- a/omp/factorization/ilu_kernels.cpp +++ b/omp/factorization/ilu_kernels.cpp @@ -46,7 +46,7 @@ namespace ilu_factorization { template void compute_lu(std::shared_ptr exec, - matrix::Csr *m) GKO_NOT_IMPLEMENTED; + matrix::Csr* m) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_ILU_COMPUTE_LU_KERNEL); diff --git a/omp/factorization/par_ic_kernels.cpp b/omp/factorization/par_ic_kernels.cpp index 67205efbaf7..eef2ea04301 100644 --- a/omp/factorization/par_ic_kernels.cpp +++ b/omp/factorization/par_ic_kernels.cpp @@ -54,7 +54,7 @@ namespace par_ic_factorization { template void init_factor(std::shared_ptr exec, - matrix::Csr *l) + matrix::Csr* l) { auto num_rows = l->get_size()[0]; auto l_row_ptrs = l->get_const_row_ptrs(); @@ -79,8 +79,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void compute_factor(std::shared_ptr exec, size_type iterations, - const matrix::Coo *a_lower, - matrix::Csr *l) + const matrix::Coo* a_lower, + matrix::Csr* l) { auto num_rows = a_lower->get_size()[0]; auto l_row_ptrs = l->get_const_row_ptrs(); diff --git a/omp/factorization/par_ict_kernels.cpp b/omp/factorization/par_ict_kernels.cpp index fed5243aab7..827c2c5ecd5 100644 --- a/omp/factorization/par_ict_kernels.cpp +++ b/omp/factorization/par_ict_kernels.cpp @@ -64,9 +64,9 @@ namespace par_ict_factorization { template void compute_factor(std::shared_ptr exec, - const matrix::Csr *a, - matrix::Csr *l, - const matrix::Coo *) + const matrix::Csr* a, + matrix::Csr* l, + const matrix::Coo*) { auto num_rows = a->get_size()[0]; auto l_row_ptrs = l->get_const_row_ptrs(); @@ -128,10 +128,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void add_candidates(std::shared_ptr exec, - const matrix::Csr *llh, - const matrix::Csr *a, - const matrix::Csr *l, - matrix::Csr *l_new) + const matrix::Csr* llh, + const matrix::Csr* a, + const matrix::Csr* l, + matrix::Csr* l_new) { auto num_rows = a->get_size()[0]; auto l_row_ptrs = l->get_const_row_ptrs(); @@ -142,7 +142,7 @@ void add_candidates(std::shared_ptr exec, // count nnz abstract_spgeam( a, llh, [](IndexType) { return IndexType{}; }, - [](IndexType row, IndexType col, ValueType, ValueType, IndexType &nnz) { + [](IndexType row, IndexType col, ValueType, ValueType, IndexType& nnz) { nnz += col <= row; }, [&](IndexType row, IndexType nnz) { l_new_row_ptrs[row] = nnz; }); @@ -173,7 +173,7 @@ void add_candidates(std::shared_ptr exec, return state; }, [&](IndexType row, IndexType col, ValueType a_val, ValueType llh_val, - row_state &state) { + row_state& state) { auto r_val = a_val - llh_val; // load matching entry of L auto l_col = checked_load(l_col_idxs, state.l_old_begin, diff --git a/omp/factorization/par_ilu_kernels.cpp b/omp/factorization/par_ilu_kernels.cpp index bbff6d84d7d..761d59cffbf 100644 --- a/omp/factorization/par_ilu_kernels.cpp +++ b/omp/factorization/par_ilu_kernels.cpp @@ -55,9 +55,9 @@ namespace par_ilu_factorization { template void compute_l_u_factors(std::shared_ptr exec, size_type iterations, - const matrix::Coo *system_matrix, - matrix::Csr *l_factor, - matrix::Csr *u_factor) + const matrix::Coo* system_matrix, + matrix::Csr* l_factor, + matrix::Csr* u_factor) { // If `iterations` is set to `Auto`, we do 3 fix-point sweeps as // experiements indicate this works well for many problems. diff --git a/omp/factorization/par_ilut_kernels.cpp b/omp/factorization/par_ilut_kernels.cpp index bd1dd027173..8f8b3317338 100644 --- a/omp/factorization/par_ilut_kernels.cpp +++ b/omp/factorization/par_ilut_kernels.cpp @@ -68,10 +68,10 @@ namespace par_ilut_factorization { template void threshold_select(std::shared_ptr exec, - const matrix::Csr *m, - IndexType rank, Array &tmp, - Array> &, - remove_complex &threshold) + const matrix::Csr* m, + IndexType rank, Array& tmp, + Array>&, + remove_complex& threshold) { auto values = m->get_const_values(); IndexType size = m->get_num_stored_elements(); @@ -98,9 +98,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( */ template void abstract_filter(std::shared_ptr exec, - const matrix::Csr *m, - matrix::Csr *m_out, - matrix::Coo *m_out_coo, + const matrix::Csr* m, + matrix::Csr* m_out, + matrix::Coo* m_out_coo, Predicate pred) { auto num_rows = m->get_size()[0]; @@ -131,7 +131,7 @@ void abstract_filter(std::shared_ptr exec, builder.get_value_array().resize_and_reset(new_nnz); auto new_col_idxs = m_out->get_col_idxs(); auto new_vals = m_out->get_values(); - IndexType *new_row_idxs{}; + IndexType* new_row_idxs{}; if (m_out_coo) { matrix::CooBuilder coo_builder{m_out_coo}; coo_builder.get_row_idx_array().resize_and_reset(new_nnz); @@ -163,10 +163,10 @@ void abstract_filter(std::shared_ptr exec, template void threshold_filter(std::shared_ptr exec, - const matrix::Csr *m, + const matrix::Csr* m, remove_complex threshold, - matrix::Csr *m_out, - matrix::Coo *m_out_coo, bool) + matrix::Csr* m_out, + matrix::Coo* m_out_coo, bool) { auto col_idxs = m->get_const_col_idxs(); auto vals = m->get_const_values(); @@ -186,11 +186,11 @@ constexpr auto sample_size = bucket_count * sampleselect_oversampling; template void threshold_filter_approx(std::shared_ptr exec, - const matrix::Csr *m, - IndexType rank, Array &tmp, - remove_complex &threshold, - matrix::Csr *m_out, - matrix::Coo *m_out_coo) + const matrix::Csr* m, + IndexType rank, Array& tmp, + remove_complex& threshold, + matrix::Csr* m_out, + matrix::Coo* m_out_coo) { auto vals = m->get_const_values(); auto col_idxs = m->get_const_col_idxs(); @@ -203,7 +203,7 @@ void threshold_filter_approx(std::shared_ptr exec, sizeof(ValueType)); tmp.resize_and_reset(storage_size); // pick and sort sample - auto sample = reinterpret_cast(tmp.get_data()); + auto sample = reinterpret_cast(tmp.get_data()); // assuming rounding towards zero auto stride = double(size) / sample_size; for (IndexType i = 0; i < sample_size; ++i) { @@ -216,7 +216,7 @@ void threshold_filter_approx(std::shared_ptr exec, sample[i] = sample[(i + 1) * sampleselect_oversampling]; } // count elements per bucket - auto total_histogram = reinterpret_cast(sample + bucket_count); + auto total_histogram = reinterpret_cast(sample + bucket_count); for (IndexType bucket = 0; bucket < bucket_count; ++bucket) { total_histogram[bucket] = 0; } @@ -266,12 +266,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void compute_l_u_factors(std::shared_ptr exec, - const matrix::Csr *a, - matrix::Csr *l, - const matrix::Coo *, - matrix::Csr *u, - const matrix::Coo *, - matrix::Csr *u_csc) + const matrix::Csr* a, + matrix::Csr* l, + const matrix::Coo*, + matrix::Csr* u, + const matrix::Coo*, + matrix::Csr* u_csc) { auto num_rows = a->get_size()[0]; auto l_row_ptrs = l->get_const_row_ptrs(); @@ -350,12 +350,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void add_candidates(std::shared_ptr exec, - const matrix::Csr *lu, - const matrix::Csr *a, - const matrix::Csr *l, - const matrix::Csr *u, - matrix::Csr *l_new, - matrix::Csr *u_new) + const matrix::Csr* lu, + const matrix::Csr* a, + const matrix::Csr* l, + const matrix::Csr* u, + matrix::Csr* l_new, + matrix::Csr* u_new) { auto num_rows = a->get_size()[0]; auto l_row_ptrs = l->get_const_row_ptrs(); @@ -371,7 +371,7 @@ void add_candidates(std::shared_ptr exec, abstract_spgeam( a, lu, [](IndexType) { return std::pair{}; }, [](IndexType row, IndexType col, ValueType, ValueType, - std::pair &nnzs) { + std::pair& nnzs) { nnzs.first += col <= row; nnzs.second += col >= row; }, @@ -421,7 +421,7 @@ void add_candidates(std::shared_ptr exec, return state; }, [&](IndexType row, IndexType col, ValueType a_val, ValueType lu_val, - row_state &state) { + row_state& state) { auto r_val = a_val - lu_val; // load matching entry of L + U auto lpu_col = state.finished_l diff --git a/omp/matrix/coo_kernels.cpp b/omp/matrix/coo_kernels.cpp index 303415405b3..81e9a90821e 100644 --- a/omp/matrix/coo_kernels.cpp +++ b/omp/matrix/coo_kernels.cpp @@ -65,8 +65,8 @@ namespace coo { template void spmv(std::shared_ptr exec, - const matrix::Coo *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Coo* a, + const matrix::Dense* b, matrix::Dense* c) { dense::fill(exec, c, zero()); spmv2(exec, a, b, c); @@ -77,11 +77,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Coo *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Coo* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { dense::scale(exec, beta, c); advanced_spmv2(exec, alpha, a, b, c); @@ -93,8 +93,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void spmv2(std::shared_ptr exec, - const matrix::Coo *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Coo* a, + const matrix::Dense* b, matrix::Dense* c) { const auto coo_val = a->get_const_values(); const auto coo_col = a->get_const_col_idxs(); @@ -145,10 +145,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV2_KERNEL); template void advanced_spmv2(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Coo *a, - const matrix::Dense *b, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Coo* a, + const matrix::Dense* b, + matrix::Dense* c) { const auto coo_val = a->get_const_values(); const auto coo_col = a->get_const_col_idxs(); @@ -203,8 +203,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_row_idxs_to_ptrs(std::shared_ptr exec, - const IndexType *idxs, size_type num_nonzeros, - IndexType *ptrs, size_type num_rows) + const IndexType* idxs, size_type num_nonzeros, + IndexType* ptrs, size_type num_rows) { convert_sorted_idxs_to_ptrs(idxs, num_nonzeros, ptrs, num_rows); } @@ -212,8 +212,8 @@ void convert_row_idxs_to_ptrs(std::shared_ptr exec, template void convert_to_csr(std::shared_ptr exec, - const matrix::Coo *source, - matrix::Csr *result) + const matrix::Coo* source, + matrix::Csr* result) { auto num_rows = result->get_size()[0]; @@ -231,8 +231,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Coo *source, - matrix::Dense *result) + const matrix::Coo* source, + matrix::Dense* result) { auto coo_val = source->get_const_values(); auto coo_col = source->get_const_col_idxs(); diff --git a/omp/matrix/csr_kernels.cpp b/omp/matrix/csr_kernels.cpp index 8b0e9f5a81d..785a934c7c6 100644 --- a/omp/matrix/csr_kernels.cpp +++ b/omp/matrix/csr_kernels.cpp @@ -72,8 +72,8 @@ namespace csr { template void spmv(std::shared_ptr exec, - const matrix::Csr *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Csr* a, + const matrix::Dense* b, matrix::Dense* c) { auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); @@ -100,11 +100,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Csr *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Csr* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); @@ -205,7 +205,7 @@ struct val_heap_element { * val_heap_element */ template -void sift_down(HeapElement *heap, typename HeapElement::index_type idx, +void sift_down(HeapElement* heap, typename HeapElement::index_type idx, typename HeapElement::index_type size) { auto curcol = heap[idx].col; @@ -260,9 +260,9 @@ void sift_down(HeapElement *heap, typename HeapElement::index_type idx, template auto spgemm_multiway_merge(size_type row, - const typename HeapElement::matrix_type *a, - const typename HeapElement::matrix_type *b, - HeapElement *heap, InitCallback init_cb, + const typename HeapElement::matrix_type* a, + const typename HeapElement::matrix_type* b, + HeapElement* heap, InitCallback init_cb, StepCallback step_cb, ColCallback col_cb) -> decltype(init_cb(0)) { @@ -296,8 +296,8 @@ auto spgemm_multiway_merge(size_type row, for (auto i = (a_size - 2) / 2; i >= 0; --i) { sift_down(heap + a_begin, i, a_size); } - auto &top = heap[a_begin]; - auto &bot = heap[a_end - 1]; + auto& top = heap[a_begin]; + auto& bot = heap[a_end - 1]; auto col = top.col; while (top.col != sentinel) { @@ -325,9 +325,9 @@ auto spgemm_multiway_merge(size_type row, template void spgemm(std::shared_ptr exec, - const matrix::Csr *a, - const matrix::Csr *b, - matrix::Csr *c) + const matrix::Csr* a, + const matrix::Csr* b, + matrix::Csr* c) { auto num_rows = a->get_size()[0]; auto c_row_ptrs = c->get_row_ptrs(); @@ -342,8 +342,8 @@ void spgemm(std::shared_ptr exec, for (size_type a_row = 0; a_row < num_rows; ++a_row) { c_row_ptrs[a_row] = spgemm_multiway_merge( a_row, a, b, col_heap, [](size_type) { return IndexType{}; }, - [](ValueType, IndexType, IndexType &) {}, - [](IndexType, IndexType &nnz) { nnz++; }); + [](ValueType, IndexType, IndexType&) {}, + [](IndexType, IndexType& nnz) { nnz++; }); } col_heap_array.clear(); @@ -359,8 +359,8 @@ void spgemm(std::shared_ptr exec, // second sweep: accumulate non-zeros auto new_nnz = c_row_ptrs[num_rows]; matrix::CsrBuilder c_builder{c}; - auto &c_col_idxs_array = c_builder.get_col_idx_array(); - auto &c_vals_array = c_builder.get_value_array(); + auto& c_col_idxs_array = c_builder.get_col_idx_array(); + auto& c_vals_array = c_builder.get_value_array(); c_col_idxs_array.resize_and_reset(new_nnz); c_vals_array.resize_and_reset(new_nnz); auto c_col_idxs = c_col_idxs_array.get_data(); @@ -374,8 +374,8 @@ void spgemm(std::shared_ptr exec, return std::make_pair(zero(), c_row_ptrs[row]); }, [](ValueType val, IndexType, - std::pair &state) { state.first += val; }, - [&](IndexType col, std::pair &state) { + std::pair& state) { state.first += val; }, + [&](IndexType col, std::pair& state) { c_col_idxs[state.second] = col; c_vals[state.second] = state.first; state.first = zero(); @@ -389,12 +389,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL); template void advanced_spgemm(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Csr *a, - const matrix::Csr *b, - const matrix::Dense *beta, - const matrix::Csr *d, - matrix::Csr *c) + const matrix::Dense* alpha, + const matrix::Csr* a, + const matrix::Csr* b, + const matrix::Dense* beta, + const matrix::Csr* d, + matrix::Csr* c) { auto num_rows = a->get_size()[0]; auto valpha = alpha->at(0, 0); @@ -412,7 +412,7 @@ void advanced_spgemm(std::shared_ptr exec, auto heap = heap_array.get_data(); auto col_heap = - reinterpret_cast *>(heap); + reinterpret_cast*>(heap); // first sweep: count nnz for each row #pragma omp parallel for @@ -422,8 +422,8 @@ void advanced_spgemm(std::shared_ptr exec, auto d_col = checked_load(d_cols, d_nz, d_end, sentinel); c_row_ptrs[a_row] = spgemm_multiway_merge( a_row, a, b, col_heap, [](size_type row) { return IndexType{}; }, - [](ValueType, IndexType, IndexType &) {}, - [&](IndexType col, IndexType &nnz) { + [](ValueType, IndexType, IndexType&) {}, + [&](IndexType col, IndexType& nnz) { // skip smaller elements from d while (d_col <= col) { d_nz++; @@ -442,8 +442,8 @@ void advanced_spgemm(std::shared_ptr exec, // second sweep: accumulate non-zeros auto new_nnz = c_row_ptrs[num_rows]; matrix::CsrBuilder c_builder{c}; - auto &c_col_idxs_array = c_builder.get_col_idx_array(); - auto &c_vals_array = c_builder.get_value_array(); + auto& c_col_idxs_array = c_builder.get_col_idx_array(); + auto& c_vals_array = c_builder.get_value_array(); c_col_idxs_array.resize_and_reset(new_nnz); c_vals_array.resize_and_reset(new_nnz); auto c_col_idxs = c_col_idxs_array.get_data(); @@ -462,10 +462,10 @@ void advanced_spgemm(std::shared_ptr exec, return std::make_pair(zero(), c_row_ptrs[row]); }, [](ValueType val, IndexType, - std::pair &state) { + std::pair& state) { state.first += val; }, - [&](IndexType col, std::pair &state) { + [&](IndexType col, std::pair& state) { // handle smaller elements from d ValueType part_d_val{}; while (d_col <= col) { @@ -506,11 +506,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void spgeam(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Csr *a, - const matrix::Dense *beta, - const matrix::Csr *b, - matrix::Csr *c) + const matrix::Dense* alpha, + const matrix::Csr* a, + const matrix::Dense* beta, + const matrix::Csr* b, + matrix::Csr* c) { auto num_rows = a->get_size()[0]; auto valpha = alpha->at(0, 0); @@ -521,7 +521,7 @@ void spgeam(std::shared_ptr exec, abstract_spgeam( a, b, [](IndexType) { return IndexType{}; }, - [](IndexType, IndexType, ValueType, ValueType, IndexType &nnz) { + [](IndexType, IndexType, ValueType, ValueType, IndexType& nnz) { ++nnz; }, [&](IndexType row, IndexType nnz) { c_row_ptrs[row] = nnz; }); @@ -532,8 +532,8 @@ void spgeam(std::shared_ptr exec, // second sweep: accumulate non-zeros auto new_nnz = c_row_ptrs[num_rows]; matrix::CsrBuilder c_builder{c}; - auto &c_col_idxs_array = c_builder.get_col_idx_array(); - auto &c_vals_array = c_builder.get_value_array(); + auto& c_col_idxs_array = c_builder.get_col_idx_array(); + auto& c_vals_array = c_builder.get_value_array(); c_col_idxs_array.resize_and_reset(new_nnz); c_vals_array.resize_and_reset(new_nnz); auto c_col_idxs = c_col_idxs_array.get_data(); @@ -542,7 +542,7 @@ void spgeam(std::shared_ptr exec, abstract_spgeam( a, b, [&](IndexType row) { return c_row_ptrs[row]; }, [&](IndexType, IndexType col, ValueType a_val, ValueType b_val, - IndexType &nz) { + IndexType& nz) { c_vals[nz] = valpha * a_val + vbeta * b_val; c_col_idxs[nz] = col; ++nz; @@ -555,8 +555,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL); template void convert_row_ptrs_to_idxs(std::shared_ptr exec, - const IndexType *ptrs, size_type num_rows, - IndexType *idxs) + const IndexType* ptrs, size_type num_rows, + IndexType* idxs) { convert_ptrs_to_idxs(ptrs, num_rows, idxs); } @@ -564,8 +564,8 @@ void convert_row_ptrs_to_idxs(std::shared_ptr exec, template void convert_to_coo(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Coo *result) + const matrix::Csr* source, + matrix::Coo* result) { auto num_rows = result->get_size()[0]; @@ -581,8 +581,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Dense *result) + const matrix::Csr* source, + matrix::Dense* result) { auto num_rows = source->get_size()[0]; auto num_cols = source->get_size()[1]; @@ -608,8 +608,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sellp(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Sellp *result) + const matrix::Csr* source, + matrix::Sellp* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -618,8 +618,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_ell(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Ell *result) + const matrix::Csr* source, + matrix::Ell* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -627,10 +627,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -inline void convert_csr_to_csc(size_type num_rows, const IndexType *row_ptrs, - const IndexType *col_idxs, - const ValueType *csr_vals, IndexType *row_idxs, - IndexType *col_ptrs, ValueType *csc_vals, +inline void convert_csr_to_csc(size_type num_rows, const IndexType* row_ptrs, + const IndexType* col_idxs, + const ValueType* csr_vals, IndexType* row_idxs, + IndexType* col_ptrs, ValueType* csc_vals, UnaryOperator op) { for (size_type row = 0; row < num_rows; ++row) { @@ -645,8 +645,8 @@ inline void convert_csr_to_csc(size_type num_rows, const IndexType *row_ptrs, template void transpose_and_transform(std::shared_ptr exec, - matrix::Csr *trans, - const matrix::Csr *orig, + matrix::Csr* trans, + const matrix::Csr* orig, UnaryOperator op) { auto trans_row_ptrs = trans->get_row_ptrs(); @@ -671,8 +671,8 @@ void transpose_and_transform(std::shared_ptr exec, template void transpose(std::shared_ptr exec, - const matrix::Csr *orig, - matrix::Csr *trans) + const matrix::Csr* orig, + matrix::Csr* trans) { transpose_and_transform(exec, trans, orig, [](const ValueType x) { return x; }); @@ -683,8 +683,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_TRANSPOSE_KERNEL); template void conj_transpose(std::shared_ptr exec, - const matrix::Csr *orig, - matrix::Csr *trans) + const matrix::Csr* orig, + matrix::Csr* trans) { transpose_and_transform(exec, trans, orig, [](const ValueType x) { return conj(x); }); @@ -696,8 +696,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_total_cols(std::shared_ptr exec, - const matrix::Csr *source, - size_type *result, size_type stride_factor, + const matrix::Csr* source, + size_type* result, size_type stride_factor, size_type slice_size) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -706,8 +706,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_max_nnz_per_row(std::shared_ptr exec, - const matrix::Csr *source, - size_type *result) GKO_NOT_IMPLEMENTED; + const matrix::Csr* source, + size_type* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_CSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); @@ -715,8 +715,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_hybrid(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Hybrid *result) + const matrix::Csr* source, + matrix::Hybrid* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -791,9 +791,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inv_symm_permute(std::shared_ptr exec, - const IndexType *perm, - const matrix::Csr *orig, - matrix::Csr *permuted) + const IndexType* perm, + const matrix::Csr* orig, + matrix::Csr* permuted) { auto in_row_ptrs = orig->get_const_row_ptrs(); auto in_col_idxs = orig->get_const_col_idxs(); @@ -829,9 +829,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void row_permute(std::shared_ptr exec, const IndexType *perm, - const matrix::Csr *orig, - matrix::Csr *row_permuted) +void row_permute(std::shared_ptr exec, const IndexType* perm, + const matrix::Csr* orig, + matrix::Csr* row_permuted) { auto orig_row_ptrs = orig->get_const_row_ptrs(); auto orig_col_idxs = orig->get_const_col_idxs(); @@ -868,9 +868,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inverse_row_permute(std::shared_ptr exec, - const IndexType *perm, - const matrix::Csr *orig, - matrix::Csr *row_permuted) + const IndexType* perm, + const matrix::Csr* orig, + matrix::Csr* row_permuted) { auto orig_row_ptrs = orig->get_const_row_ptrs(); auto orig_col_idxs = orig->get_const_col_idxs(); @@ -907,8 +907,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_nonzeros_per_row(std::shared_ptr exec, - const matrix::Csr *source, - Array *result) + const matrix::Csr* source, + Array* result) { const auto row_ptrs = source->get_const_row_ptrs(); auto row_nnz_val = result->get_data(); @@ -925,7 +925,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void sort_by_column_index(std::shared_ptr exec, - matrix::Csr *to_sort) + matrix::Csr* to_sort) { auto values = to_sort->get_values(); auto row_ptrs = to_sort->get_row_ptrs(); @@ -948,7 +948,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::Csr *to_check, bool *is_sorted) + const matrix::Csr* to_check, bool* is_sorted) { const auto row_ptrs = to_check->get_const_row_ptrs(); const auto col_idxs = to_check->get_const_col_idxs(); @@ -975,8 +975,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Csr *orig, - matrix::Diagonal *diag) + const matrix::Csr* orig, + matrix::Diagonal* diag) { const auto row_ptrs = orig->get_const_row_ptrs(); const auto col_idxs = orig->get_const_col_idxs(); diff --git a/omp/matrix/dense_kernels.cpp b/omp/matrix/dense_kernels.cpp index fcbb5c58abd..c0e4ca75ae3 100644 --- a/omp/matrix/dense_kernels.cpp +++ b/omp/matrix/dense_kernels.cpp @@ -67,9 +67,9 @@ namespace dense { template void simple_apply(std::shared_ptr exec, - const matrix::Dense *a, - const matrix::Dense *b, - matrix::Dense *c) + const matrix::Dense* a, + const matrix::Dense* b, + matrix::Dense* c) { #pragma omp parallel for for (size_type row = 0; row < c->get_size()[0]; ++row) { @@ -93,9 +93,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL); template void apply(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Dense *a, const matrix::Dense *b, - const matrix::Dense *beta, matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Dense* a, const matrix::Dense* b, + const matrix::Dense* beta, matrix::Dense* c) { if (beta->at(0, 0) != zero()) { #pragma omp parallel for @@ -129,9 +129,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); template void compute_dot(std::shared_ptr exec, - const matrix::Dense *x, - const matrix::Dense *y, - matrix::Dense *result) + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) { #pragma omp parallel for for (size_type j = 0; j < x->get_size()[1]; ++j) { @@ -150,9 +150,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL); template void compute_conj_dot(std::shared_ptr exec, - const matrix::Dense *x, - const matrix::Dense *y, - matrix::Dense *result) + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) { #pragma omp parallel for for (size_type j = 0; j < x->get_size()[1]; ++j) { @@ -171,8 +171,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL); template void compute_norm2(std::shared_ptr exec, - const matrix::Dense *x, - matrix::Dense> *result) + const matrix::Dense* x, + matrix::Dense>* result) { using norm_type = remove_complex; #pragma omp parallel for @@ -196,8 +196,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL); template void convert_to_coo(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Coo *result) + const matrix::Dense* source, + matrix::Coo* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -242,8 +242,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Csr *result) + const matrix::Dense* source, + matrix::Csr* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -285,8 +285,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_ell(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Ell *result) + const matrix::Dense* source, + matrix::Ell* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -318,8 +318,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_hybrid(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Hybrid *result) + const matrix::Dense* source, + matrix::Hybrid* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -389,8 +389,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sellp(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Sellp *result) + const matrix::Dense* source, + matrix::Sellp* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -464,8 +464,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sparsity_csr(std::shared_ptr exec, - const matrix::Dense *source, - matrix::SparsityCsr *result) + const matrix::Dense* source, + matrix::SparsityCsr* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -506,7 +506,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Dense *source, size_type *result) + const matrix::Dense* source, size_type* result) { auto num_rows = source->get_size()[0]; auto num_cols = source->get_size()[1]; @@ -527,8 +527,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COUNT_NONZEROS_KERNEL); template void calculate_max_nnz_per_row(std::shared_ptr exec, - const matrix::Dense *source, - size_type *result) + const matrix::Dense* source, + size_type* result) { const auto num_rows = source->get_size()[0]; const auto num_cols = source->get_size()[1]; @@ -550,8 +550,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void calculate_nonzeros_per_row(std::shared_ptr exec, - const matrix::Dense *source, - Array *result) + const matrix::Dense* source, + Array* result) { auto num_rows = source->get_size()[0]; auto num_cols = source->get_size()[1]; @@ -572,8 +572,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void calculate_total_cols(std::shared_ptr exec, - const matrix::Dense *source, - size_type *result, size_type stride_factor, + const matrix::Dense* source, + size_type* result, size_type stride_factor, size_type slice_size) { auto num_rows = source->get_size()[0]; @@ -605,8 +605,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void transpose(std::shared_ptr exec, - const matrix::Dense *orig, - matrix::Dense *trans) + const matrix::Dense* orig, + matrix::Dense* trans) { #pragma omp parallel for for (size_type i = 0; i < orig->get_size()[0]; ++i) { @@ -621,8 +621,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_TRANSPOSE_KERNEL); template void conj_transpose(std::shared_ptr exec, - const matrix::Dense *orig, - matrix::Dense *trans) + const matrix::Dense* orig, + matrix::Dense* trans) { #pragma omp parallel for for (size_type i = 0; i < orig->get_size()[0]; ++i) { diff --git a/omp/matrix/diagonal_kernels.cpp b/omp/matrix/diagonal_kernels.cpp index 4f5c814e7fd..72b0ad21945 100644 --- a/omp/matrix/diagonal_kernels.cpp +++ b/omp/matrix/diagonal_kernels.cpp @@ -53,9 +53,9 @@ namespace diagonal { template void apply_to_csr(std::shared_ptr exec, - const matrix::Diagonal *a, - const matrix::Csr *b, - matrix::Csr *c) + const matrix::Diagonal* a, + const matrix::Csr* b, + matrix::Csr* c) { const auto diag_values = a->get_const_values(); c->copy_from(b); diff --git a/omp/matrix/ell_kernels.cpp b/omp/matrix/ell_kernels.cpp index 96f4d5347d2..f2454decb32 100644 --- a/omp/matrix/ell_kernels.cpp +++ b/omp/matrix/ell_kernels.cpp @@ -61,9 +61,9 @@ namespace ell { template void spmv(std::shared_ptr exec, - const matrix::Ell *a, - const matrix::Dense *b, - matrix::Dense *c) + const matrix::Ell* a, + const matrix::Dense* b, + matrix::Dense* c) { using a_accessor = gko::acc::reduced_row_major<1, OutputValueType, const MatrixValueType>; @@ -102,11 +102,11 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Ell *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Ell* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { using a_accessor = gko::acc::reduced_row_major<1, OutputValueType, const MatrixValueType>; @@ -146,8 +146,8 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Ell *source, - matrix::Dense *result) + const matrix::Ell* source, + matrix::Dense* result) { auto num_rows = source->get_size()[0]; auto num_cols = source->get_size()[1]; @@ -171,8 +171,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Ell *source, - matrix::Csr *result) + const matrix::Ell* source, + matrix::Csr* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -180,8 +180,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Ell *source, - size_type *result) + const matrix::Ell* source, + size_type* result) { size_type nonzeros = 0; const auto num_rows = source->get_size()[0]; @@ -204,8 +204,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_nonzeros_per_row(std::shared_ptr exec, - const matrix::Ell *source, - Array *result) GKO_NOT_IMPLEMENTED; + const matrix::Ell* source, + Array* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_ELL_CALCULATE_NONZEROS_PER_ROW_KERNEL); @@ -213,8 +213,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Ell *orig, - matrix::Diagonal *diag) + const matrix::Ell* orig, + matrix::Diagonal* diag) { const auto col_idxs = orig->get_const_col_idxs(); const auto values = orig->get_const_values(); diff --git a/omp/matrix/fbcsr_kernels.cpp b/omp/matrix/fbcsr_kernels.cpp index f2747e12e18..3396a18dc9e 100644 --- a/omp/matrix/fbcsr_kernels.cpp +++ b/omp/matrix/fbcsr_kernels.cpp @@ -60,20 +60,20 @@ namespace fbcsr { template void spmv(std::shared_ptr exec, - const matrix::Fbcsr *const a, - const matrix::Dense *const b, - matrix::Dense *const c) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* const a, + const matrix::Dense* const b, + matrix::Dense* const c) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *const alpha, - const matrix::Fbcsr *const a, - const matrix::Dense *const b, - const matrix::Dense *const beta, - matrix::Dense *const c) GKO_NOT_IMPLEMENTED; + const matrix::Dense* const alpha, + const matrix::Fbcsr* const a, + const matrix::Dense* const b, + const matrix::Dense* const beta, + matrix::Dense* const c) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); @@ -81,8 +81,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Fbcsr *const source, - matrix::Dense *const result) + const matrix::Fbcsr* const source, + matrix::Dense* const result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -91,8 +91,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(const std::shared_ptr exec, - const matrix::Fbcsr *const source, - matrix::Csr *const result) + const matrix::Fbcsr* const source, + matrix::Csr* const result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -101,24 +101,24 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template inline void convert_fbcsr_to_csc( - size_type num_rows, const IndexType *const row_ptrs, - const IndexType *const col_idxs, const ValueType *const fbcsr_vals, - IndexType *const row_idxs, IndexType *const col_ptrs, - ValueType *const csc_vals, UnaryOperator op) GKO_NOT_IMPLEMENTED; + size_type num_rows, const IndexType* const row_ptrs, + const IndexType* const col_idxs, const ValueType* const fbcsr_vals, + IndexType* const row_idxs, IndexType* const col_ptrs, + ValueType* const csc_vals, UnaryOperator op) GKO_NOT_IMPLEMENTED; template void transpose_and_transform( std::shared_ptr exec, - matrix::Fbcsr *const trans, - const matrix::Fbcsr *const orig, + matrix::Fbcsr* const trans, + const matrix::Fbcsr* const orig, UnaryOperator op) GKO_NOT_IMPLEMENTED; template void transpose(std::shared_ptr exec, - const matrix::Fbcsr *const orig, - matrix::Fbcsr *const trans) + const matrix::Fbcsr* const orig, + matrix::Fbcsr* const trans) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -127,8 +127,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void conj_transpose(std::shared_ptr exec, - const matrix::Fbcsr *const orig, - matrix::Fbcsr *const trans) + const matrix::Fbcsr* const orig, + matrix::Fbcsr* const trans) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -138,8 +138,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_max_nnz_per_row( std::shared_ptr exec, - const matrix::Fbcsr *const source, - size_type *const result) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* const source, + size_type* const result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_MAX_NNZ_PER_ROW_KERNEL); @@ -148,8 +148,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_nonzeros_per_row( std::shared_ptr exec, - const matrix::Fbcsr *const source, - Array *const result) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* const source, + Array* const result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_CALCULATE_NONZEROS_PER_ROW_KERNEL); @@ -158,8 +158,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::Fbcsr *const to_check, - bool *const is_sorted) GKO_NOT_IMPLEMENTED; + const matrix::Fbcsr* const to_check, + bool* const is_sorted) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); @@ -167,7 +167,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void sort_by_column_index(const std::shared_ptr exec, - matrix::Fbcsr *const to_sort) + matrix::Fbcsr* const to_sort) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -176,8 +176,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Fbcsr *const orig, - matrix::Diagonal *const diag) + const matrix::Fbcsr* const orig, + matrix::Diagonal* const diag) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( diff --git a/omp/matrix/hybrid_kernels.cpp b/omp/matrix/hybrid_kernels.cpp index f97bee4a640..e8dbd8828e4 100644 --- a/omp/matrix/hybrid_kernels.cpp +++ b/omp/matrix/hybrid_kernels.cpp @@ -59,8 +59,8 @@ namespace hybrid { template void convert_to_dense(std::shared_ptr exec, - const matrix::Hybrid *source, - matrix::Dense *result) + const matrix::Hybrid* source, + matrix::Dense* result) { auto num_rows = source->get_size()[0]; auto num_cols = source->get_size()[1]; @@ -99,8 +99,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Hybrid *source, - matrix::Csr *result) + const matrix::Hybrid* source, + matrix::Csr* result) { auto csr_val = result->get_values(); auto csr_col_idxs = result->get_col_idxs(); @@ -161,8 +161,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Hybrid *source, - size_type *result) + const matrix::Hybrid* source, + size_type* result) { size_type ell_nnz = 0; size_type coo_nnz = 0; diff --git a/omp/matrix/sellp_kernels.cpp b/omp/matrix/sellp_kernels.cpp index 28ae80ff92a..ccb3b4877e6 100644 --- a/omp/matrix/sellp_kernels.cpp +++ b/omp/matrix/sellp_kernels.cpp @@ -52,8 +52,8 @@ namespace sellp { template void spmv(std::shared_ptr exec, - const matrix::Sellp *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Sellp* a, + const matrix::Dense* b, matrix::Dense* c) { auto col_idxs = a->get_const_col_idxs(); auto slice_lengths = a->get_const_slice_lengths(); @@ -85,11 +85,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SELLP_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Sellp *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Sellp* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { auto vals = a->get_const_values(); auto col_idxs = a->get_const_col_idxs(); @@ -125,8 +125,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Sellp *source, - matrix::Dense *result) + const matrix::Sellp* source, + matrix::Dense* result) { auto num_rows = source->get_size()[0]; auto num_cols = source->get_size()[1]; @@ -161,8 +161,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Sellp *source, - matrix::Csr *result) + const matrix::Sellp* source, + matrix::Csr* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( @@ -171,8 +171,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Sellp *source, - size_type *result) GKO_NOT_IMPLEMENTED; + const matrix::Sellp* source, + size_type* result) GKO_NOT_IMPLEMENTED; GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( GKO_DECLARE_SELLP_COUNT_NONZEROS_KERNEL); @@ -180,8 +180,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Sellp *orig, - matrix::Diagonal *diag) + const matrix::Sellp* orig, + matrix::Diagonal* diag) { const auto diag_size = diag->get_size()[0]; const auto slice_size = orig->get_slice_size(); diff --git a/omp/matrix/sparsity_csr_kernels.cpp b/omp/matrix/sparsity_csr_kernels.cpp index fee834f188e..86f32e75f49 100644 --- a/omp/matrix/sparsity_csr_kernels.cpp +++ b/omp/matrix/sparsity_csr_kernels.cpp @@ -63,8 +63,8 @@ namespace sparsity_csr { template void spmv(std::shared_ptr exec, - const matrix::SparsityCsr *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::SparsityCsr* a, + const matrix::Dense* b, matrix::Dense* c) { auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); @@ -91,11 +91,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::SparsityCsr *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::SparsityCsr* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); @@ -125,8 +125,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_num_diagonal_elements( std::shared_ptr exec, - const matrix::SparsityCsr *matrix, - size_type *num_diagonal_elements) + const matrix::SparsityCsr* matrix, + size_type* num_diagonal_elements) { auto num_rows = matrix->get_size()[0]; auto row_ptrs = matrix->get_const_row_ptrs(); @@ -148,9 +148,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void remove_diagonal_elements(std::shared_ptr exec, - const IndexType *row_ptrs, - const IndexType *col_idxs, - matrix::SparsityCsr *matrix) + const IndexType* row_ptrs, + const IndexType* col_idxs, + matrix::SparsityCsr* matrix) { auto num_rows = matrix->get_size()[0]; auto adj_ptrs = matrix->get_row_ptrs(); @@ -182,9 +182,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template inline void convert_sparsity_to_csc(size_type num_rows, - const IndexType *row_ptrs, - const IndexType *col_idxs, - IndexType *row_idxs, IndexType *col_ptrs) + const IndexType* row_ptrs, + const IndexType* col_idxs, + IndexType* row_idxs, IndexType* col_ptrs) { for (size_type row = 0; row < num_rows; ++row) { for (auto i = row_ptrs[row]; i < row_ptrs[row + 1]; ++i) { @@ -198,8 +198,8 @@ inline void convert_sparsity_to_csc(size_type num_rows, template void transpose_and_transform( std::shared_ptr exec, - matrix::SparsityCsr *trans, - const matrix::SparsityCsr *orig) + matrix::SparsityCsr* trans, + const matrix::SparsityCsr* orig) { auto trans_row_ptrs = trans->get_row_ptrs(); auto orig_row_ptrs = orig->get_const_row_ptrs(); @@ -221,8 +221,8 @@ void transpose_and_transform( template void transpose(std::shared_ptr exec, - const matrix::SparsityCsr *orig, - matrix::SparsityCsr *trans) + const matrix::SparsityCsr* orig, + matrix::SparsityCsr* trans) { transpose_and_transform(exec, trans, orig); } @@ -233,7 +233,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void sort_by_column_index(std::shared_ptr exec, - matrix::SparsityCsr *to_sort) + matrix::SparsityCsr* to_sort) { auto row_ptrs = to_sort->get_row_ptrs(); auto col_idxs = to_sort->get_col_idxs(); @@ -253,7 +253,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::SparsityCsr *to_check, bool *is_sorted) + const matrix::SparsityCsr* to_check, bool* is_sorted) { const auto row_ptrs = to_check->get_const_row_ptrs(); const auto col_idxs = to_check->get_const_col_idxs(); diff --git a/omp/multigrid/amgx_pgm_kernels.cpp b/omp/multigrid/amgx_pgm_kernels.cpp index 94c7f9b983d..1699b73f436 100644 --- a/omp/multigrid/amgx_pgm_kernels.cpp +++ b/omp/multigrid/amgx_pgm_kernels.cpp @@ -68,8 +68,8 @@ namespace amgx_pgm { template void match_edge(std::shared_ptr exec, - const Array &strongest_neighbor, - Array &agg) + const Array& strongest_neighbor, + Array& agg) { auto agg_vals = agg.get_data(); auto strongest_neighbor_vals = strongest_neighbor.get_const_data(); @@ -92,7 +92,7 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_MATCH_EDGE_KERNEL); template void count_unagg(std::shared_ptr exec, - const Array &agg, IndexType *num_unagg) + const Array& agg, IndexType* num_unagg) { IndexType unagg = 0; #pragma omp parallel for reduction(+ : unagg) @@ -106,8 +106,8 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_COUNT_UNAGG_KERNEL); template -void renumber(std::shared_ptr exec, Array &agg, - IndexType *num_agg) +void renumber(std::shared_ptr exec, Array& agg, + IndexType* num_agg) { const auto num = agg.get_num_elems(); Array agg_map(exec, num + 1); @@ -133,9 +133,9 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_RENUMBER_KERNEL); template void find_strongest_neighbor( std::shared_ptr exec, - const matrix::Csr *weight_mtx, - const matrix::Diagonal *diag, Array &agg, - Array &strongest_neighbor) + const matrix::Csr* weight_mtx, + const matrix::Diagonal* diag, Array& agg, + Array& strongest_neighbor) { const auto row_ptrs = weight_mtx->get_const_row_ptrs(); const auto col_idxs = weight_mtx->get_const_col_idxs(); @@ -188,10 +188,10 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( template void assign_to_exist_agg(std::shared_ptr exec, - const matrix::Csr *weight_mtx, - const matrix::Diagonal *diag, - Array &agg, - Array &intermediate_agg) + const matrix::Csr* weight_mtx, + const matrix::Diagonal* diag, + Array& agg, + Array& intermediate_agg) { const auto row_ptrs = weight_mtx->get_const_row_ptrs(); const auto col_idxs = weight_mtx->get_const_col_idxs(); diff --git a/omp/preconditioner/isai_kernels.cpp b/omp/preconditioner/isai_kernels.cpp index b7866770b1e..82af989a3db 100644 --- a/omp/preconditioner/isai_kernels.cpp +++ b/omp/preconditioner/isai_kernels.cpp @@ -62,8 +62,8 @@ namespace isai { template -void forall_matching(const IndexType *fst, IndexType fst_size, - const IndexType *snd, IndexType snd_size, Callback cb) +void forall_matching(const IndexType* fst, IndexType fst_size, + const IndexType* snd, IndexType snd_size, Callback cb) { IndexType fst_idx{}; IndexType snd_idx{}; @@ -82,9 +82,9 @@ void forall_matching(const IndexType *fst, IndexType fst_size, template void generic_generate(std::shared_ptr exec, - const matrix::Csr *mtx, - matrix::Csr *inverse_mtx, - IndexType *excess_rhs_ptrs, IndexType *excess_nz_ptrs, + const matrix::Csr* mtx, + matrix::Csr* inverse_mtx, + IndexType* excess_rhs_ptrs, IndexType* excess_nz_ptrs, Callable direct_solve, bool tri) { /* @@ -205,14 +205,14 @@ void generic_generate(std::shared_ptr exec, template void generate_tri_inverse(std::shared_ptr exec, - const matrix::Csr *mtx, - matrix::Csr *inverse_mtx, - IndexType *excess_rhs_ptrs, IndexType *excess_nz_ptrs, + const matrix::Csr* mtx, + matrix::Csr* inverse_mtx, + IndexType* excess_rhs_ptrs, IndexType* excess_nz_ptrs, bool lower) { auto trs_solve = [lower](const range> trisystem, - ValueType *rhs, const IndexType) { + ValueType* rhs, const IndexType) { const IndexType size = trisystem.length(0); if (size <= 0) { return; @@ -255,7 +255,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -inline IndexType choose_pivot(IndexType block_size, const ValueType *block, +inline IndexType choose_pivot(IndexType block_size, const ValueType* block, size_type stride) { IndexType cp = 0; @@ -270,7 +270,7 @@ inline IndexType choose_pivot(IndexType block_size, const ValueType *block, template inline void swap_rows(IndexType row1, IndexType row2, IndexType block_size, - ValueType *block, size_type stride) + ValueType* block, size_type stride) { using std::swap; for (IndexType i = 0; i < block_size; ++i) { @@ -281,15 +281,15 @@ inline void swap_rows(IndexType row1, IndexType row2, IndexType block_size, template void generate_general_inverse(std::shared_ptr exec, - const matrix::Csr *mtx, - matrix::Csr *inverse_mtx, - IndexType *excess_rhs_ptrs, - IndexType *excess_nz_ptrs, bool spd) + const matrix::Csr* mtx, + matrix::Csr* inverse_mtx, + IndexType* excess_rhs_ptrs, + IndexType* excess_nz_ptrs, bool spd) { using std::swap; auto general_solve = [spd](const range> transposed_system_range, - ValueType *rhs, const IndexType rhs_one_idx) { + ValueType* rhs, const IndexType rhs_one_idx) { const IndexType size = transposed_system_range.length(0); if (size <= 0) { return; @@ -350,12 +350,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void generate_excess_system(std::shared_ptr, - const matrix::Csr *input, - const matrix::Csr *inverse, - const IndexType *excess_rhs_ptrs, - const IndexType *excess_nz_ptrs, - matrix::Csr *excess_system, - matrix::Dense *excess_rhs, + const matrix::Csr* input, + const matrix::Csr* inverse, + const IndexType* excess_rhs_ptrs, + const IndexType* excess_nz_ptrs, + matrix::Csr* excess_system, + matrix::Dense* excess_rhs, size_type e_start, size_type e_end) { const auto num_rows = input->get_size()[0]; @@ -414,8 +414,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void scale_excess_solution(std::shared_ptr, - const IndexType *excess_block_ptrs, - matrix::Dense *excess_solution, + const IndexType* excess_block_ptrs, + matrix::Dense* excess_solution, size_type e_start, size_type e_end) { auto excess_values = excess_solution->get_values(); @@ -441,9 +441,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void scatter_excess_solution(std::shared_ptr, - const IndexType *excess_block_ptrs, - const matrix::Dense *excess_solution, - matrix::Csr *inverse, + const IndexType* excess_block_ptrs, + const matrix::Dense* excess_solution, + matrix::Csr* inverse, size_type e_start, size_type e_end) { auto excess_values = excess_solution->get_const_values(); diff --git a/omp/preconditioner/jacobi_kernels.cpp b/omp/preconditioner/jacobi_kernels.cpp index 1b0c0c6c1a5..f1240ed8173 100644 --- a/omp/preconditioner/jacobi_kernels.cpp +++ b/omp/preconditioner/jacobi_kernels.cpp @@ -67,8 +67,8 @@ namespace jacobi { void initialize_precisions(std::shared_ptr exec, - const Array &source, - Array &precisions) + const Array& source, + Array& precisions) { const auto source_size = source.get_num_elems(); for (auto i = 0u; i < precisions.get_num_elems(); ++i) { @@ -81,9 +81,9 @@ namespace { template -inline bool has_same_nonzero_pattern(const IndexType *prev_row_ptr, - const IndexType *curr_row_ptr, - const IndexType *next_row_ptr) +inline bool has_same_nonzero_pattern(const IndexType* prev_row_ptr, + const IndexType* curr_row_ptr, + const IndexType* next_row_ptr) { return std::distance(curr_row_ptr, next_row_ptr) == std::distance(prev_row_ptr, curr_row_ptr) && @@ -92,8 +92,8 @@ inline bool has_same_nonzero_pattern(const IndexType *prev_row_ptr, template -size_type find_natural_blocks(const matrix::Csr *mtx, - uint32 max_block_size, IndexType *block_ptrs) +size_type find_natural_blocks(const matrix::Csr* mtx, + uint32 max_block_size, IndexType* block_ptrs) { const auto rows = mtx->get_size()[0]; const auto row_ptrs = mtx->get_const_row_ptrs(); @@ -127,7 +127,7 @@ size_type find_natural_blocks(const matrix::Csr *mtx, template inline size_type agglomerate_supervariables(uint32 max_block_size, size_type num_natural_blocks, - IndexType *block_ptrs) + IndexType* block_ptrs) { if (num_natural_blocks == 0) { return 0; @@ -154,9 +154,9 @@ inline size_type agglomerate_supervariables(uint32 max_block_size, template void find_blocks(std::shared_ptr exec, - const matrix::Csr *system_matrix, - uint32 max_block_size, size_type &num_blocks, - Array &block_pointers) + const matrix::Csr* system_matrix, + uint32 max_block_size, size_type& num_blocks, + Array& block_pointers) { num_blocks = find_natural_blocks(system_matrix, max_block_size, block_pointers.get_data()); @@ -172,9 +172,9 @@ namespace { template -inline void extract_block(const matrix::Csr *mtx, +inline void extract_block(const matrix::Csr* mtx, IndexType block_size, IndexType block_start, - ValueType *block, size_type stride) + ValueType* block, size_type stride) { for (int i = 0; i < block_size; ++i) { for (int j = 0; j < block_size; ++j) { @@ -198,7 +198,7 @@ inline void extract_block(const matrix::Csr *mtx, template -inline IndexType choose_pivot(IndexType block_size, const ValueType *block, +inline IndexType choose_pivot(IndexType block_size, const ValueType* block, size_type stride) { IndexType cp = 0; @@ -213,7 +213,7 @@ inline IndexType choose_pivot(IndexType block_size, const ValueType *block, template inline void swap_rows(IndexType row1, IndexType row2, IndexType block_size, - ValueType *block, size_type stride) + ValueType* block, size_type stride) { using std::swap; for (IndexType i = 0; i < block_size; ++i) { @@ -224,7 +224,7 @@ inline void swap_rows(IndexType row1, IndexType row2, IndexType block_size, template inline bool apply_gauss_jordan_transform(IndexType row, IndexType col, - IndexType block_size, ValueType *block, + IndexType block_size, ValueType* block, size_type stride) { const auto d = block[row * stride + col]; @@ -253,8 +253,8 @@ template > -inline void transpose_block(IndexType block_size, const SourceValueType *from, - size_type from_stride, ResultValueType *to, +inline void transpose_block(IndexType block_size, const SourceValueType* from, + size_type from_stride, ResultValueType* to, size_type to_stride, ValueConverter converter = {}) noexcept { @@ -271,8 +271,8 @@ template > inline void conj_transpose_block(IndexType block_size, - const SourceValueType *from, - size_type from_stride, ResultValueType *to, + const SourceValueType* from, + size_type from_stride, ResultValueType* to, size_type to_stride, ValueConverter converter = {}) noexcept { @@ -289,10 +289,10 @@ template > inline void permute_and_transpose_block(IndexType block_size, - const IndexType *col_perm, - const SourceValueType *source, + const IndexType* col_perm, + const SourceValueType* source, size_type source_stride, - ResultValueType *result, + ResultValueType* result, size_type result_stride, ValueConverter converter = {}) { @@ -306,8 +306,8 @@ inline void permute_and_transpose_block(IndexType block_size, template -inline bool invert_block(IndexType block_size, IndexType *perm, - ValueType *block, size_type stride) +inline bool invert_block(IndexType block_size, IndexType* perm, + ValueType* block, size_type stride) { using std::swap; for (IndexType k = 0; k < block_size; ++k) { @@ -327,10 +327,10 @@ inline bool invert_block(IndexType block_size, IndexType *perm, template inline bool validate_precision_reduction_feasibility(IndexType block_size, - const ValueType *block, + const ValueType* block, size_type stride, - ValueType *tmp_buffer, - IndexType *tmp_perm) + ValueType* tmp_buffer, + IndexType* tmp_perm) { using gko::detail::float_traits; std::iota(tmp_perm, tmp_perm + block_size, IndexType{0}); @@ -357,14 +357,14 @@ inline bool validate_precision_reduction_feasibility(IndexType block_size, template void generate(std::shared_ptr exec, - const matrix::Csr *system_matrix, + const matrix::Csr* system_matrix, size_type num_blocks, uint32 max_block_size, remove_complex accuracy, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - Array> &conditioning, - Array &block_precisions, - const Array &block_pointers, Array &blocks) + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + Array>& conditioning, + Array& block_precisions, + const Array& block_pointers, Array& blocks) { const auto ptrs = block_pointers.get_const_data(); const auto prec = block_precisions.get_data(); @@ -457,7 +457,7 @@ void generate(std::shared_ptr exec, ValueType, p, permute_and_transpose_block( block_size, perm, block, block_size, - reinterpret_cast( + reinterpret_cast( blocks.get_data() + storage_scheme.get_group_offset(g + b)) + storage_scheme.get_block_offset(g + b), @@ -477,9 +477,9 @@ template < typename ValueType, typename BlockValueType, typename ValueConverter = default_converter> inline void apply_block(size_type block_size, size_type num_rhs, - const BlockValueType *block, size_type stride, - ValueType alpha, const ValueType *b, size_type stride_b, - ValueType beta, ValueType *x, size_type stride_x, + const BlockValueType* block, size_type stride, + ValueType alpha, const ValueType* b, size_type stride_b, + ValueType beta, ValueType* x, size_type stride_x, ValueConverter converter = {}) { if (beta != zero()) { @@ -514,14 +514,14 @@ inline void apply_block(size_type block_size, size_type num_rhs, template void apply(std::shared_ptr exec, size_type num_blocks, uint32 max_block_size, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - const Array &block_precisions, - const Array &block_pointers, - const Array &blocks, - const matrix::Dense *alpha, - const matrix::Dense *b, - const matrix::Dense *beta, matrix::Dense *x) + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + const Array& block_precisions, + const Array& block_pointers, + const Array& blocks, + const matrix::Dense* alpha, + const matrix::Dense* b, + const matrix::Dense* beta, matrix::Dense* x) { const auto ptrs = block_pointers.get_const_data(); const auto prec = block_precisions.get_const_data(); @@ -536,7 +536,7 @@ void apply(std::shared_ptr exec, size_type num_blocks, GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION( ValueType, p, apply_block(block_size, b->get_size()[1], - reinterpret_cast(group) + + reinterpret_cast(group) + storage_scheme.get_block_offset(i), storage_scheme.get_stride(), alpha->at(0, 0), block_b, b->get_stride(), beta->at(0, 0), block_x, @@ -551,11 +551,11 @@ template void simple_apply( std::shared_ptr exec, size_type num_blocks, uint32 max_block_size, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const matrix::Dense *b, matrix::Dense *x) + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const matrix::Dense* b, matrix::Dense* x) { const auto ptrs = block_pointers.get_const_data(); const auto prec = block_precisions.get_const_data(); @@ -570,7 +570,7 @@ void simple_apply( GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION( ValueType, p, apply_block(block_size, b->get_size()[1], - reinterpret_cast(group) + + reinterpret_cast(group) + storage_scheme.get_block_offset(i), storage_scheme.get_stride(), one(), block_b, b->get_stride(), zero(), block_x, @@ -585,11 +585,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void transpose_jacobi( std::shared_ptr exec, size_type num_blocks, - uint32 max_block_size, const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - Array &out_blocks) + uint32 max_block_size, const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + Array& out_blocks) { const auto ptrs = block_pointers.get_const_data(); const auto prec = block_precisions.get_const_data(); @@ -608,9 +608,9 @@ void transpose_jacobi( ValueType, p, transpose_block( block_size, - reinterpret_cast(group) + block_ofs, + reinterpret_cast(group) + block_ofs, block_stride, - reinterpret_cast(out_group) + block_ofs, + reinterpret_cast(out_group) + block_ofs, block_stride)); } } @@ -622,11 +622,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void conj_transpose_jacobi( std::shared_ptr exec, size_type num_blocks, - uint32 max_block_size, const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - Array &out_blocks) + uint32 max_block_size, const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + Array& out_blocks) { const auto ptrs = block_pointers.get_const_data(); const auto prec = block_precisions.get_const_data(); @@ -645,9 +645,9 @@ void conj_transpose_jacobi( ValueType, p, conj_transpose_block( block_size, - reinterpret_cast(group) + block_ofs, + reinterpret_cast(group) + block_ofs, block_stride, - reinterpret_cast(out_group) + block_ofs, + reinterpret_cast(out_group) + block_ofs, block_stride)); } } @@ -659,11 +659,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense( std::shared_ptr exec, size_type num_blocks, - const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - ValueType *result_values, size_type result_stride) + const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + ValueType* result_values, size_type result_stride) { const auto ptrs = block_pointers.get_const_data(); const auto prec = block_precisions.get_const_data(); @@ -682,13 +682,12 @@ void convert_to_dense( const auto p = prec ? prec[i] : precision_reduction(); GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION( ValueType, p, - transpose_block( - block_size, - reinterpret_cast(group) + - storage_scheme.get_block_offset(i), - storage_scheme.get_stride(), - result_values + ptrs[i] * result_stride + ptrs[i], - result_stride)); + transpose_block(block_size, + reinterpret_cast(group) + + storage_scheme.get_block_offset(i), + storage_scheme.get_stride(), + result_values + ptrs[i] * result_stride + ptrs[i], + result_stride)); } } diff --git a/omp/reorder/rcm_kernels.cpp b/omp/reorder/rcm_kernels.cpp index 9b3a454f334..d0fb0e00d7a 100644 --- a/omp/reorder/rcm_kernels.cpp +++ b/omp/reorder/rcm_kernels.cpp @@ -85,8 +85,8 @@ namespace rcm { template void get_degree_of_nodes(std::shared_ptr exec, const IndexType num_vertices, - const IndexType *const row_ptrs, - IndexType *const degrees) + const IndexType* const row_ptrs, + IndexType* const degrees) { #pragma omp parallel for for (IndexType i = 0; i < num_vertices; ++i) { @@ -123,14 +123,14 @@ struct UbfsLinearQueue { write_mutex() {} - UbfsLinearQueue(UbfsLinearQueue &other) = delete; - UbfsLinearQueue &operator=(const UbfsLinearQueue &other) = delete; + UbfsLinearQueue(UbfsLinearQueue& other) = delete; + UbfsLinearQueue& operator=(const UbfsLinearQueue& other) = delete; /** * Copies a chunk of nodes back into the work queue, * in a thread-safe manner. */ - void enqueue_chunk(const IndexType *const chunk, size_type n) + void enqueue_chunk(const IndexType* const chunk, size_type n) { const auto data = &arr[0]; @@ -159,7 +159,7 @@ struct UbfsLinearQueue { * finally returned, after all threads stopped working and still no nodes * are available, the algorithm is definitely done. */ - std::pair dequeue_chunk(int *threads_working) + std::pair dequeue_chunk(int* threads_working) { const auto data = &arr[0]; std::lock_guard read_guard{read_mutex}; @@ -210,26 +210,26 @@ struct UbfsLinearQueue { #ifdef _MSC_VER -#define GKO_CMPXCHG_IMPL(ptr, ptr_expected, replace_with) \ - if (sizeof replace_with == 8) { \ - return _InterlockedCompareExchange64(reinterpret_cast(ptr), \ - replace_with, \ - *ptr_expected) == *ptr_expected; \ - } \ - if (sizeof replace_with == 4) { \ - return _InterlockedCompareExchange(reinterpret_cast(ptr), \ - replace_with, \ - *ptr_expected) == *ptr_expected; \ - } \ - if (sizeof replace_with == 2) { \ - return _InterlockedCompareExchange16(reinterpret_cast(ptr), \ - replace_with, \ - *ptr_expected) == *ptr_expected; \ - } \ - if (sizeof replace_with == 1) { \ - return _InterlockedCompareExchange8(reinterpret_cast(ptr), \ - replace_with, \ - *ptr_expected) == *ptr_expected; \ +#define GKO_CMPXCHG_IMPL(ptr, ptr_expected, replace_with) \ + if (sizeof replace_with == 8) { \ + return _InterlockedCompareExchange64(reinterpret_cast(ptr), \ + replace_with, \ + *ptr_expected) == *ptr_expected; \ + } \ + if (sizeof replace_with == 4) { \ + return _InterlockedCompareExchange(reinterpret_cast(ptr), \ + replace_with, \ + *ptr_expected) == *ptr_expected; \ + } \ + if (sizeof replace_with == 2) { \ + return _InterlockedCompareExchange16(reinterpret_cast(ptr), \ + replace_with, \ + *ptr_expected) == *ptr_expected; \ + } \ + if (sizeof replace_with == 1) { \ + return _InterlockedCompareExchange8(reinterpret_cast(ptr), \ + replace_with, \ + *ptr_expected) == *ptr_expected; \ } #else #define GKO_CMPXCHG_IMPL(ptr, ptr_expected, replace_with) \ @@ -246,7 +246,7 @@ struct UbfsLinearQueue { * Usage with non-primitive types is explicitly discouraged. */ template -inline bool compare_exchange_weak_acqrel(TargetType *value, TargetType old, +inline bool compare_exchange_weak_acqrel(TargetType* value, TargetType old, TargetType newer) { GKO_CMPXCHG_IMPL(value, &old, newer) @@ -255,11 +255,11 @@ inline bool compare_exchange_weak_acqrel(TargetType *value, TargetType old, template inline void reduce_neighbours_levels(const IndexType num_vertices, - const IndexType *const row_ptrs, - const IndexType *const col_idxs, - IndexType *const local_to_insert, - size_type *const local_to_insert_size, - IndexType *const levels, + const IndexType* const row_ptrs, + const IndexType* const col_idxs, + IndexType* const local_to_insert, + size_type* const local_to_insert_size, + IndexType* const levels, const IndexType node) { IndexType level; @@ -314,8 +314,8 @@ inline void reduce_neighbours_levels(const IndexType num_vertices, */ template void ubfs(std::shared_ptr exec, const IndexType num_vertices, - const IndexType *const row_ptrs, const IndexType *const col_idxs, - IndexType *const + const IndexType* const row_ptrs, const IndexType* const col_idxs, + IndexType* const levels, // Must be inf/max in all nodes connected to source const IndexType start, const IndexType max_degree) { @@ -385,9 +385,9 @@ void ubfs(std::shared_ptr exec, const IndexType num_vertices, template std::pair rls_contender_and_height( std::shared_ptr exec, const IndexType num_vertices, - const IndexType *const row_ptrs, const IndexType *const col_idxs, - const IndexType *const degrees, - IndexType *const levels, // Must be max/inf in all nodes connected to start + const IndexType* const row_ptrs, const IndexType* const col_idxs, + const IndexType* const degrees, + IndexType* const levels, // Must be max/inf in all nodes connected to start const IndexType start, const IndexType max_degree) { // Layout: ((level, degree), idx). @@ -443,9 +443,9 @@ std::pair rls_contender_and_height( template std::pair find_min_idx_and_max_val( std::shared_ptr exec, const IndexType num_vertices, - const IndexType *const row_ptrs, const IndexType *const col_idxs, - const IndexType *const degrees, vector &levels, - const uint8 *const previous_component, + const IndexType* const row_ptrs, const IndexType* const col_idxs, + const IndexType* const degrees, vector& levels, + const uint8* const previous_component, gko::reorder::starting_strategy strategy) { // Layout: ((min_val, min_idx), (max_val, max_idx)). @@ -507,11 +507,11 @@ std::pair find_min_idx_and_max_val( template IndexType find_start_node(std::shared_ptr exec, const IndexType num_vertices, - const IndexType *const row_ptrs, - const IndexType *const col_idxs, - const IndexType *const degrees, - vector &levels, - const uint8 *const previous_component, + const IndexType* const row_ptrs, + const IndexType* const col_idxs, + const IndexType* const degrees, + vector& levels, + const uint8* const previous_component, const gko::reorder::starting_strategy strategy) { // Find the node with minimal degree and the maximum degree. @@ -570,8 +570,8 @@ IndexType find_start_node(std::shared_ptr exec, */ template vector count_levels(std::shared_ptr exec, - const IndexType *const levels, - uint8 *const previous_component, + const IndexType* const levels, + uint8* const previous_component, IndexType num_vertices) { const int32 num_threads = omp_get_max_threads(); @@ -622,9 +622,9 @@ vector count_levels(std::shared_ptr exec, */ template vector compute_level_offsets(std::shared_ptr exec, - const IndexType *const levels, + const IndexType* const levels, IndexType num_vertices, - uint8 *const previous_component) + uint8* const previous_component) { auto counts = count_levels(exec, levels, previous_component, num_vertices); counts.push_back(0); @@ -650,10 +650,10 @@ constexpr int32 level_processed = -1; */ template void write_permutation(std::shared_ptr exec, - const IndexType *const row_ptrs, - const IndexType *const col_idxs, IndexType *const levels, - const IndexType *const degrees, - const vector &offsets, IndexType *const perm, + const IndexType* const row_ptrs, + const IndexType* const col_idxs, IndexType* const levels, + const IndexType* const degrees, + const vector& offsets, IndexType* const perm, const IndexType num_vertices, const IndexType base_offset, const IndexType start) { @@ -743,11 +743,11 @@ void write_permutation(std::shared_ptr exec, */ template IndexType handle_isolated_nodes(std::shared_ptr exec, - const IndexType *const row_ptrs, - const IndexType *const col_idxs, - const IndexType *const degrees, - IndexType *const perm, IndexType num_vertices, - vector &previous_component) + const IndexType* const row_ptrs, + const IndexType* const col_idxs, + const IndexType* const degrees, + IndexType* const perm, IndexType num_vertices, + vector& previous_component) { const int32 num_threads = omp_get_max_threads(); vector> local_isolated_nodes( @@ -787,10 +787,10 @@ IndexType handle_isolated_nodes(std::shared_ptr exec, template void get_permutation(std::shared_ptr exec, const IndexType num_vertices, - const IndexType *const row_ptrs, - const IndexType *const col_idxs, - const IndexType *const degrees, IndexType *const perm, - IndexType *const inv_perm, + const IndexType* const row_ptrs, + const IndexType* const col_idxs, + const IndexType* const degrees, IndexType* const perm, + IndexType* const inv_perm, const gko::reorder::starting_strategy strategy) { // Initialize the perm to all "signal value". diff --git a/omp/solver/cb_gmres_kernels.cpp b/omp/solver/cb_gmres_kernels.cpp index 09c0c2265ba..2f41553d58e 100644 --- a/omp/solver/cb_gmres_kernels.cpp +++ b/omp/solver/cb_gmres_kernels.cpp @@ -60,12 +60,12 @@ namespace { template -void finish_arnoldi_CGS(matrix::Dense *next_krylov_basis, +void finish_arnoldi_CGS(matrix::Dense* next_krylov_basis, Accessor3d krylov_bases, - matrix::Dense *hessenberg_iter, - matrix::Dense *buffer_iter, - matrix::Dense> *arnoldi_norm, - size_type iter, const stopping_status *stop_status) + matrix::Dense* hessenberg_iter, + matrix::Dense* buffer_iter, + matrix::Dense>* arnoldi_norm, + size_type iter, const stopping_status* stop_status) { using rc_vtype = remove_complex; constexpr bool has_scalar = @@ -191,9 +191,9 @@ void finish_arnoldi_CGS(matrix::Dense *next_krylov_basis, template -void calculate_sin_and_cos(matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense *hessenberg_iter, +void calculate_sin_and_cos(matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense* hessenberg_iter, size_type iter, const size_type rhs) { if (hessenberg_iter->at(iter, rhs) == zero()) { @@ -213,10 +213,10 @@ void calculate_sin_and_cos(matrix::Dense *givens_sin, template -void givens_rotation(matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense *hessenberg_iter, size_type iter, - const stopping_status *stop_status) +void givens_rotation(matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense* hessenberg_iter, size_type iter, + const stopping_status* stop_status) { #pragma omp parallel for for (size_type i = 0; i < hessenberg_iter->get_size()[1]; ++i) { @@ -252,10 +252,10 @@ void givens_rotation(matrix::Dense *givens_sin, template void calculate_next_residual_norm( - matrix::Dense *givens_sin, matrix::Dense *givens_cos, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, size_type iter, - const stopping_status *stop_status) + matrix::Dense* givens_sin, matrix::Dense* givens_cos, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, size_type iter, + const stopping_status* stop_status) { #pragma omp parallel for for (size_type i = 0; i < residual_norm->get_size()[1]; ++i) { @@ -275,9 +275,9 @@ void calculate_next_residual_norm( template void solve_upper_triangular( - const matrix::Dense *residual_norm_collection, - const matrix::Dense *hessenberg, matrix::Dense *y, - const size_type *final_iter_nums) + const matrix::Dense* residual_norm_collection, + const matrix::Dense* hessenberg, matrix::Dense* y, + const size_type* final_iter_nums) { #pragma omp parallel for for (size_type k = 0; k < residual_norm_collection->get_size()[1]; ++k) { @@ -299,9 +299,9 @@ void solve_upper_triangular( template void calculate_qy(ConstAccessor3d krylov_bases, - const matrix::Dense *y, - matrix::Dense *before_preconditioner, - const size_type *final_iter_nums) + const matrix::Dense* y, + matrix::Dense* before_preconditioner, + const size_type* final_iter_nums) { #pragma omp parallel for for (size_type i = 0; i < before_preconditioner->get_size()[0]; ++i) { @@ -321,11 +321,11 @@ void calculate_qy(ConstAccessor3d krylov_bases, template void initialize_1(std::shared_ptr exec, - const matrix::Dense *b, - matrix::Dense *residual, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - Array *stop_status, size_type krylov_dim) + const matrix::Dense* b, + matrix::Dense* residual, + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + Array* stop_status, size_type krylov_dim) { using rc_vtype = remove_complex; @@ -349,13 +349,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CB_GMRES_INITIALIZE_1_KERNEL); template void initialize_2(std::shared_ptr exec, - const matrix::Dense *residual, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - matrix::Dense> *arnoldi_norm, + const matrix::Dense* residual, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + matrix::Dense>* arnoldi_norm, Accessor3d krylov_bases, - matrix::Dense *next_krylov_basis, - Array *final_iter_nums, size_type krylov_dim) + matrix::Dense* next_krylov_basis, + Array* final_iter_nums, size_type krylov_dim) { using rc_vtype = remove_complex; constexpr bool has_scalar = @@ -424,17 +424,17 @@ GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE( template void step_1(std::shared_ptr exec, - matrix::Dense *next_krylov_basis, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - Accessor3d krylov_bases, matrix::Dense *hessenberg_iter, - matrix::Dense *buffer_iter, - matrix::Dense> *arnoldi_norm, - size_type iter, Array *final_iter_nums, - const Array *stop_status, Array *, - Array *) + matrix::Dense* next_krylov_basis, + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + Accessor3d krylov_bases, matrix::Dense* hessenberg_iter, + matrix::Dense* buffer_iter, + matrix::Dense>* arnoldi_norm, + size_type iter, Array* final_iter_nums, + const Array* stop_status, Array*, + Array*) { #pragma omp parallel for for (size_type i = 0; i < final_iter_nums->get_num_elems(); ++i) { @@ -457,12 +457,12 @@ GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - const matrix::Dense *residual_norm_collection, + const matrix::Dense* residual_norm_collection, ConstAccessor3d krylov_bases, - const matrix::Dense *hessenberg, - matrix::Dense *y, - matrix::Dense *before_preconditioner, - const Array *final_iter_nums) + const matrix::Dense* hessenberg, + matrix::Dense* y, + matrix::Dense* before_preconditioner, + const Array* final_iter_nums) { solve_upper_triangular(residual_norm_collection, hessenberg, y, final_iter_nums->get_const_data()); diff --git a/omp/solver/gmres_kernels.cpp b/omp/solver/gmres_kernels.cpp index 80bfbd6f54e..fc78e0ae6ee 100644 --- a/omp/solver/gmres_kernels.cpp +++ b/omp/solver/gmres_kernels.cpp @@ -58,9 +58,9 @@ namespace { template -void finish_arnoldi(size_type num_rows, matrix::Dense *krylov_bases, - matrix::Dense *hessenberg_iter, size_type iter, - const stopping_status *stop_status) +void finish_arnoldi(size_type num_rows, matrix::Dense* krylov_bases, + matrix::Dense* hessenberg_iter, size_type iter, + const stopping_status* stop_status) { const auto krylov_bases_rowoffset = num_rows; const auto next_krylov_rowoffset = (iter + 1) * krylov_bases_rowoffset; @@ -113,9 +113,9 @@ void finish_arnoldi(size_type num_rows, matrix::Dense *krylov_bases, template -void calculate_sin_and_cos(matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense *hessenberg_iter, +void calculate_sin_and_cos(matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense* hessenberg_iter, size_type iter, const size_type rhs) { if (hessenberg_iter->at(iter, rhs) == zero()) { @@ -135,10 +135,10 @@ void calculate_sin_and_cos(matrix::Dense *givens_sin, template -void givens_rotation(matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense *hessenberg_iter, size_type iter, - const stopping_status *stop_status) +void givens_rotation(matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense* hessenberg_iter, size_type iter, + const stopping_status* stop_status) { #pragma omp parallel for for (size_type i = 0; i < hessenberg_iter->get_size()[1]; ++i) { @@ -174,10 +174,10 @@ void givens_rotation(matrix::Dense *givens_sin, template void calculate_next_residual_norm( - matrix::Dense *givens_sin, matrix::Dense *givens_cos, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, size_type iter, - const stopping_status *stop_status) + matrix::Dense* givens_sin, matrix::Dense* givens_cos, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, size_type iter, + const stopping_status* stop_status) { #pragma omp parallel for for (size_type i = 0; i < residual_norm->get_size()[1]; ++i) { @@ -197,9 +197,9 @@ void calculate_next_residual_norm( template void solve_upper_triangular( - const matrix::Dense *residual_norm_collection, - const matrix::Dense *hessenberg, matrix::Dense *y, - const size_type *final_iter_nums) + const matrix::Dense* residual_norm_collection, + const matrix::Dense* hessenberg, matrix::Dense* y, + const size_type* final_iter_nums) { #pragma omp parallel for for (size_type k = 0; k < residual_norm_collection->get_size()[1]; ++k) { @@ -220,10 +220,10 @@ void solve_upper_triangular( template -void calculate_qy(const matrix::Dense *krylov_bases, - const matrix::Dense *y, - matrix::Dense *before_preconditioner, - const size_type *final_iter_nums) +void calculate_qy(const matrix::Dense* krylov_bases, + const matrix::Dense* y, + matrix::Dense* before_preconditioner, + const size_type* final_iter_nums) { const auto krylov_bases_rowoffset = before_preconditioner->get_size()[0]; #pragma omp parallel for @@ -245,11 +245,11 @@ void calculate_qy(const matrix::Dense *krylov_bases, template void initialize_1(std::shared_ptr exec, - const matrix::Dense *b, - matrix::Dense *residual, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - Array *stop_status, size_type krylov_dim) + const matrix::Dense* b, + matrix::Dense* residual, + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + Array* stop_status, size_type krylov_dim) { using norm_type = remove_complex; for (size_type j = 0; j < b->get_size()[1]; ++j) { @@ -272,11 +272,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_INITIALIZE_1_KERNEL); template void initialize_2(std::shared_ptr exec, - const matrix::Dense *residual, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - matrix::Dense *krylov_bases, - Array *final_iter_nums, size_type krylov_dim) + const matrix::Dense* residual, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + matrix::Dense* krylov_bases, + Array* final_iter_nums, size_type krylov_dim) { using norm_type = remove_complex; for (size_type j = 0; j < residual->get_size()[1]; ++j) { @@ -304,14 +304,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_INITIALIZE_2_KERNEL); template void step_1(std::shared_ptr exec, size_type num_rows, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - matrix::Dense *krylov_bases, - matrix::Dense *hessenberg_iter, size_type iter, - Array *final_iter_nums, - const Array *stop_status) + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + matrix::Dense* krylov_bases, + matrix::Dense* hessenberg_iter, size_type iter, + Array* final_iter_nums, + const Array* stop_status) { #pragma omp parallel for for (size_type i = 0; i < final_iter_nums->get_num_elems(); ++i) { @@ -333,12 +333,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - const matrix::Dense *residual_norm_collection, - const matrix::Dense *krylov_bases, - const matrix::Dense *hessenberg, - matrix::Dense *y, - matrix::Dense *before_preconditioner, - const Array *final_iter_nums) + const matrix::Dense* residual_norm_collection, + const matrix::Dense* krylov_bases, + const matrix::Dense* hessenberg, + matrix::Dense* y, + matrix::Dense* before_preconditioner, + const Array* final_iter_nums) { solve_upper_triangular(residual_norm_collection, hessenberg, y, final_iter_nums->get_const_data()); diff --git a/omp/solver/idr_kernels.cpp b/omp/solver/idr_kernels.cpp index 44dc5d870be..ebe1bee9090 100644 --- a/omp/solver/idr_kernels.cpp +++ b/omp/solver/idr_kernels.cpp @@ -62,10 +62,10 @@ namespace { template void solve_lower_triangular(const size_type nrhs, - const matrix::Dense *m, - const matrix::Dense *f, - matrix::Dense *c, - const Array *stop_status) + const matrix::Dense* m, + const matrix::Dense* f, + matrix::Dense* c, + const Array* stop_status) { #pragma omp parallel for for (size_type i = 0; i < f->get_size()[1]; i++) { @@ -86,11 +86,11 @@ void solve_lower_triangular(const size_type nrhs, template void update_g_and_u(const size_type nrhs, const size_type k, - const matrix::Dense *p, - const matrix::Dense *m, - matrix::Dense *g, matrix::Dense *g_k, - matrix::Dense *u, - const Array *stop_status) + const matrix::Dense* p, + const matrix::Dense* m, + matrix::Dense* g, matrix::Dense* g_k, + matrix::Dense* u, + const Array* stop_status) { #pragma omp parallel for for (size_type i = 0; i < nrhs; i++) { @@ -120,7 +120,7 @@ void update_g_and_u(const size_type nrhs, const size_type k, template typename std::enable_if::value, ValueType>::type -get_rand_value(Distribution &&dist, Generator &&gen) +get_rand_value(Distribution&& dist, Generator&& gen) { return dist(gen); } @@ -128,7 +128,7 @@ get_rand_value(Distribution &&dist, Generator &&gen) template typename std::enable_if::value, ValueType>::type -get_rand_value(Distribution &&dist, Generator &&gen) +get_rand_value(Distribution&& dist, Generator&& gen) { return ValueType(dist(gen), dist(gen)); } @@ -139,9 +139,9 @@ get_rand_value(Distribution &&dist, Generator &&gen) template void initialize(std::shared_ptr exec, const size_type nrhs, - matrix::Dense *m, - matrix::Dense *subspace_vectors, bool deterministic, - Array *stop_status) + matrix::Dense* m, + matrix::Dense* subspace_vectors, bool deterministic, + Array* stop_status) { #pragma omp declare reduction(add:ValueType : omp_out = omp_out + omp_in) @@ -205,12 +205,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense *m, - const matrix::Dense *f, - const matrix::Dense *residual, - const matrix::Dense *g, matrix::Dense *c, - matrix::Dense *v, - const Array *stop_status) + const size_type k, const matrix::Dense* m, + const matrix::Dense* f, + const matrix::Dense* residual, + const matrix::Dense* g, matrix::Dense* c, + matrix::Dense* v, + const Array* stop_status) { const auto m_size = m->get_size(); @@ -239,10 +239,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense *omega, - const matrix::Dense *preconditioned_vector, - const matrix::Dense *c, matrix::Dense *u, - const Array *stop_status) + const size_type k, const matrix::Dense* omega, + const matrix::Dense* preconditioned_vector, + const matrix::Dense* c, matrix::Dense* u, + const Array* stop_status) { for (size_type i = 0; i < nrhs; i++) { if (stop_status->get_const_data()[i].has_stopped()) { @@ -265,12 +265,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_2_KERNEL); template void step_3(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense *p, - matrix::Dense *g, matrix::Dense *g_k, - matrix::Dense *u, matrix::Dense *m, - matrix::Dense *f, matrix::Dense *, - matrix::Dense *residual, matrix::Dense *x, - const Array *stop_status) + const size_type k, const matrix::Dense* p, + matrix::Dense* g, matrix::Dense* g_k, + matrix::Dense* u, matrix::Dense* m, + matrix::Dense* f, matrix::Dense*, + matrix::Dense* residual, matrix::Dense* x, + const Array* stop_status) { update_g_and_u(nrhs, k, p, m, g, g_k, u, stop_status); @@ -312,9 +312,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_3_KERNEL); template void compute_omega( std::shared_ptr exec, const size_type nrhs, - const remove_complex kappa, const matrix::Dense *tht, - const matrix::Dense> *residual_norm, - matrix::Dense *omega, const Array *stop_status) + const remove_complex kappa, const matrix::Dense* tht, + const matrix::Dense>* residual_norm, + matrix::Dense* omega, const Array* stop_status) { #pragma omp parallel for for (size_type i = 0; i < nrhs; i++) { diff --git a/omp/solver/lower_trs_kernels.cpp b/omp/solver/lower_trs_kernels.cpp index b313f762d2f..edc81943ee5 100644 --- a/omp/solver/lower_trs_kernels.cpp +++ b/omp/solver/lower_trs_kernels.cpp @@ -60,14 +60,14 @@ namespace lower_trs { void should_perform_transpose(std::shared_ptr exec, - bool &do_transpose) + bool& do_transpose) { do_transpose = false; } void init_struct(std::shared_ptr exec, - std::shared_ptr &solve_struct) + std::shared_ptr& solve_struct) { // This init kernel is here to allow initialization of the solve struct for // a more sophisticated implementation as for other executors. @@ -76,8 +76,8 @@ void init_struct(std::shared_ptr exec, template void generate(std::shared_ptr exec, - const matrix::Csr *matrix, - solver::SolveStruct *solve_struct, const gko::size_type num_rhs) + const matrix::Csr* matrix, + solver::SolveStruct* solve_struct, const gko::size_type num_rhs) { // This generate kernel is here to allow for a more sophisticated // implementation as for other executors. This kernel would perform the @@ -94,10 +94,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( */ template void solve(std::shared_ptr exec, - const matrix::Csr *matrix, - const solver::SolveStruct *solve_struct, - matrix::Dense *trans_b, matrix::Dense *trans_x, - const matrix::Dense *b, matrix::Dense *x) + const matrix::Csr* matrix, + const solver::SolveStruct* solve_struct, + matrix::Dense* trans_b, matrix::Dense* trans_x, + const matrix::Dense* b, matrix::Dense* x) { auto row_ptrs = matrix->get_const_row_ptrs(); auto col_idxs = matrix->get_const_col_idxs(); diff --git a/omp/solver/upper_trs_kernels.cpp b/omp/solver/upper_trs_kernels.cpp index 29ecfda6c8c..0952c339990 100644 --- a/omp/solver/upper_trs_kernels.cpp +++ b/omp/solver/upper_trs_kernels.cpp @@ -60,14 +60,14 @@ namespace upper_trs { void should_perform_transpose(std::shared_ptr exec, - bool &do_transpose) + bool& do_transpose) { do_transpose = false; } void init_struct(std::shared_ptr exec, - std::shared_ptr &solve_struct) + std::shared_ptr& solve_struct) { // This init kernel is here to allow initialization of the solve struct for // a more sophisticated implementation as for other executors. @@ -76,8 +76,8 @@ void init_struct(std::shared_ptr exec, template void generate(std::shared_ptr exec, - const matrix::Csr *matrix, - solver::SolveStruct *solve_struct, const gko::size_type num_rhs) + const matrix::Csr* matrix, + solver::SolveStruct* solve_struct, const gko::size_type num_rhs) { // This generate kernel is here to allow for a more sophisticated // implementation as for other executors. This kernel would perform the @@ -94,10 +94,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( */ template void solve(std::shared_ptr exec, - const matrix::Csr *matrix, - const solver::SolveStruct *solve_struct, - matrix::Dense *trans_b, matrix::Dense *trans_x, - const matrix::Dense *b, matrix::Dense *x) + const matrix::Csr* matrix, + const solver::SolveStruct* solve_struct, + matrix::Dense* trans_b, matrix::Dense* trans_x, + const matrix::Dense* b, matrix::Dense* x) { auto row_ptrs = matrix->get_const_row_ptrs(); auto col_idxs = matrix->get_const_col_idxs(); diff --git a/omp/stop/criterion_kernels.cpp b/omp/stop/criterion_kernels.cpp index d48b5cc0165..fc9e9cd8040 100644 --- a/omp/stop/criterion_kernels.cpp +++ b/omp/stop/criterion_kernels.cpp @@ -48,7 +48,7 @@ namespace set_all_statuses { void set_all_statuses(std::shared_ptr exec, uint8 stoppingId, - bool setFinalized, Array *stop_status) + bool setFinalized, Array* stop_status) { #pragma omp parallel for for (int i = 0; i < stop_status->get_num_elems(); i++) { diff --git a/omp/stop/residual_norm_kernels.cpp b/omp/stop/residual_norm_kernels.cpp index a62dccbae48..4973b8c61c1 100644 --- a/omp/stop/residual_norm_kernels.cpp +++ b/omp/stop/residual_norm_kernels.cpp @@ -53,12 +53,12 @@ namespace residual_norm { template void residual_norm(std::shared_ptr exec, - const matrix::Dense *tau, - const matrix::Dense *orig_tau, + const matrix::Dense* tau, + const matrix::Dense* orig_tau, ValueType rel_residual_goal, uint8 stoppingId, - bool setFinalized, Array *stop_status, - Array *device_storage, bool *all_converged, - bool *one_changed) + bool setFinalized, Array* stop_status, + Array* device_storage, bool* all_converged, + bool* one_changed) { static_assert(is_complex_s::value == false, "ValueType must not be complex in this function!"); @@ -101,11 +101,11 @@ namespace implicit_residual_norm { template void implicit_residual_norm( std::shared_ptr exec, - const matrix::Dense *tau, - const matrix::Dense> *orig_tau, + const matrix::Dense* tau, + const matrix::Dense>* orig_tau, remove_complex rel_residual_goal, uint8 stoppingId, - bool setFinalized, Array *stop_status, - Array *device_storage, bool *all_converged, bool *one_changed) + bool setFinalized, Array* stop_status, + Array* device_storage, bool* all_converged, bool* one_changed) { bool local_one_changed = false; #pragma omp parallel for reduction(|| : local_one_changed) diff --git a/omp/test/base/kernel_launch.cpp b/omp/test/base/kernel_launch.cpp index cb9ae347dea..2c4712cfa52 100644 --- a/omp/test/base/kernel_launch.cpp +++ b/omp/test/base/kernel_launch.cpp @@ -97,7 +97,7 @@ TEST_F(KernelLaunch, Runs1D) exec, [] GKO_KERNEL(auto i, auto d) { static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); d[i] = i; }, zero_array.get_num_elems(), zero_array.get_data()); @@ -112,8 +112,8 @@ TEST_F(KernelLaunch, Runs1DArray) exec, [] GKO_KERNEL(auto i, auto d, auto d_ptr) { static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); if (d == d_ptr) { d[i] = i; } else { @@ -132,10 +132,10 @@ TEST_F(KernelLaunch, Runs1DDense) exec, [] GKO_KERNEL(auto i, auto d, auto d2, auto d_ptr) { static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); - static_assert(is_same::value, + static_assert(is_same::value, "type"); bool pointers_correct = d.data == d_ptr && d2.data == d_ptr; bool strides_correct = d.stride == 5 && d2.stride == 5; @@ -151,7 +151,7 @@ TEST_F(KernelLaunch, Runs1DDense) } }, 16, zero_dense2.get(), - static_cast *>(zero_dense2.get()), + static_cast*>(zero_dense2.get()), zero_dense2->get_const_values()); GKO_ASSERT_MTX_NEAR(zero_dense2, iota_dense, 0.0); @@ -165,7 +165,7 @@ TEST_F(KernelLaunch, Runs2D) [] GKO_KERNEL(auto i, auto j, auto d) { static_assert(is_same::value, "index"); static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); d[i + 4 * j] = 4 * i + j; }, dim<2>{4, 4}, zero_array.get_data()); @@ -181,8 +181,8 @@ TEST_F(KernelLaunch, Runs2DArray) [] GKO_KERNEL(auto i, auto j, auto d, auto d_ptr) { static_assert(is_same::value, "index"); static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); if (d == d_ptr) { d[i + 4 * j] = 4 * i + j; } else { @@ -202,15 +202,15 @@ TEST_F(KernelLaunch, Runs2DDense) [] GKO_KERNEL(auto i, auto j, auto d, auto d2, auto d_ptr, auto d3, auto d4, auto d2_ptr, auto d3_ptr) { static_assert(is_same::value, "index"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); - static_assert(is_same::value, + static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); - static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); + static_assert(is_same::value, "type"); bool pointers_correct = d.data == d_ptr && d2.data == d_ptr && d3.data == d2_ptr && d4 == d3_ptr; bool strides_correct = @@ -229,7 +229,7 @@ TEST_F(KernelLaunch, Runs2DDense) } }, dim<2>{4, 4}, zero_dense->get_stride(), zero_dense2.get(), - static_cast *>(zero_dense2.get()), + static_cast*>(zero_dense2.get()), zero_dense2->get_const_values(), gko::kernels::omp::default_stride(zero_dense.get()), gko::kernels::omp::row_vector(vec_dense.get()), diff --git a/omp/test/factorization/par_ilu_kernels.cpp b/omp/test/factorization/par_ilu_kernels.cpp index 918a53cc979..f5606f37303 100644 --- a/omp/test/factorization/par_ilu_kernels.cpp +++ b/omp/test/factorization/par_ilu_kernels.cpp @@ -139,10 +139,10 @@ class ParIlu : public ::testing::Test { return mtx; } - void initialize_row_ptrs(index_type *l_row_ptrs_ref, - index_type *u_row_ptrs_ref, - index_type *l_row_ptrs_omp, - index_type *u_row_ptrs_omp) + void initialize_row_ptrs(index_type* l_row_ptrs_ref, + index_type* u_row_ptrs_ref, + index_type* l_row_ptrs_omp, + index_type* u_row_ptrs_omp) { gko::kernels::reference::factorization::initialize_row_ptrs_l_u( ref, gko::lend(csr_ref), l_row_ptrs_ref, u_row_ptrs_ref); @@ -150,8 +150,8 @@ class ParIlu : public ::testing::Test { omp, gko::lend(csr_omp), l_row_ptrs_omp, u_row_ptrs_omp); } - void initialize_lu(std::unique_ptr *l_ref, std::unique_ptr *u_ref, - std::unique_ptr *l_omp, std::unique_ptr *u_omp) + void initialize_lu(std::unique_ptr* l_ref, std::unique_ptr* u_ref, + std::unique_ptr* l_omp, std::unique_ptr* u_omp) { auto num_row_ptrs = csr_ref->get_size()[0] + 1; gko::Array l_row_ptrs_ref{ref, num_row_ptrs}; @@ -189,13 +189,13 @@ class ParIlu : public ::testing::Test { template static std::unique_ptr static_unique_ptr_cast( - std::unique_ptr &&from) + std::unique_ptr&& from) { - return std::unique_ptr{static_cast(from.release())}; + return std::unique_ptr{static_cast(from.release())}; } - void compute_lu(std::unique_ptr *l_ref, std::unique_ptr *u_ref, - std::unique_ptr *l_omp, std::unique_ptr *u_omp, + void compute_lu(std::unique_ptr* l_ref, std::unique_ptr* u_ref, + std::unique_ptr* l_omp, std::unique_ptr* u_omp, gko::size_type iterations = 0) { auto coo_ref = Coo::create(ref); diff --git a/omp/test/factorization/par_ilut_kernels.cpp b/omp/test/factorization/par_ilut_kernels.cpp index a0562e320e3..abac5fe49c0 100644 --- a/omp/test/factorization/par_ilut_kernels.cpp +++ b/omp/test/factorization/par_ilut_kernels.cpp @@ -172,8 +172,8 @@ class ParIlut : public ::testing::Test { dmtx_ut_ani->copy_from(mtx_ut_ani.get()); } - void test_select(const std::unique_ptr &mtx, - const std::unique_ptr &dmtx, index_type rank, + void test_select(const std::unique_ptr& mtx, + const std::unique_ptr& dmtx, index_type rank, gko::remove_complex tolerance = 0.0) { auto size = index_type(mtx->get_num_stored_elements()); @@ -193,8 +193,8 @@ class ParIlut : public ::testing::Test { ASSERT_EQ(res, dres); } - void test_filter(const std::unique_ptr &mtx, - const std::unique_ptr &dmtx, + void test_filter(const std::unique_ptr& mtx, + const std::unique_ptr& dmtx, gko::remove_complex threshold, bool lower) { auto res = Csr::create(ref, mtx_size); @@ -219,8 +219,8 @@ class ParIlut : public ::testing::Test { GKO_ASSERT_MTX_EQ_SPARSITY(dres, dres_coo); } - void test_filter_approx(const std::unique_ptr &mtx, - const std::unique_ptr &dmtx, index_type rank) + void test_filter_approx(const std::unique_ptr& mtx, + const std::unique_ptr& dmtx, index_type rank) { auto res = Csr::create(ref, mtx_size); auto dres = Csr::create(omp, mtx_size); @@ -306,7 +306,7 @@ TYPED_TEST(ParIlut, KernelThresholdFilterNullptrCooIsEquivalentToRef) using Coo = typename TestFixture::Coo; auto res = Csr::create(this->ref, this->mtx_size); auto dres = Csr::create(this->omp, this->mtx_size); - Coo *null_coo = nullptr; + Coo* null_coo = nullptr; gko::kernels::reference::par_ilut_factorization::threshold_filter( this->ref, this->mtx_l.get(), 0.5, res.get(), null_coo, true); @@ -363,7 +363,7 @@ TYPED_TEST(ParIlut, KernelThresholdFilterApproxNullptrCooIsEquivalentToRef) this->test_filter(this->mtx_l, this->dmtx_l, 0.5, true); auto res = Csr::create(this->ref, this->mtx_size); auto dres = Csr::create(this->omp, this->mtx_size); - Coo *null_coo = nullptr; + Coo* null_coo = nullptr; gko::Array tmp(this->ref); gko::Array dtmp(this->omp); gko::remove_complex threshold{}; diff --git a/omp/test/matrix/dense_kernels.cpp b/omp/test/matrix/dense_kernels.cpp index 9335cb49f6e..345f7aa3211 100644 --- a/omp/test/matrix/dense_kernels.cpp +++ b/omp/test/matrix/dense_kernels.cpp @@ -151,7 +151,7 @@ class Dense : public ::testing::Test { std::shuffle(tmp2.begin(), tmp2.end(), rng); std::vector tmp3(x->get_size()[0] / 10); std::uniform_int_distribution row_dist(0, x->get_size()[0] - 1); - for (auto &i : tmp3) { + for (auto& i : tmp3) { i = row_dist(rng); } rpermute_idxs = @@ -163,7 +163,7 @@ class Dense : public ::testing::Test { } template - std::unique_ptr convert(InputType &&input) + std::unique_ptr convert(InputType&& input) { auto result = ConvertedType::create(input->get_executor()); input->convert_to(result.get()); @@ -695,8 +695,8 @@ TEST_F(Dense, IsTransposable) auto trans = x->transpose(); auto dtrans = dx->transpose(); - GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), - static_cast(trans.get()), 0); + GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), + static_cast(trans.get()), 0); } @@ -727,8 +727,8 @@ TEST_F(Dense, IsConjugateTransposable) auto trans = c_x->conj_transpose(); auto dtrans = dc_x->conj_transpose(); - GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), - static_cast(trans.get()), 0); + GKO_ASSERT_MTX_NEAR(static_cast(dtrans.get()), + static_cast(trans.get()), 0); } diff --git a/omp/test/matrix/diagonal_kernels.cpp b/omp/test/matrix/diagonal_kernels.cpp index 56d74398a82..fb184054187 100644 --- a/omp/test/matrix/diagonal_kernels.cpp +++ b/omp/test/matrix/diagonal_kernels.cpp @@ -237,9 +237,9 @@ TEST_F(Diagonal, ConjTransposeIsEquivalentToRef) set_up_complex_data(); auto trans = cdiag->conj_transpose(); - auto trans_diag = static_cast(trans.get()); + auto trans_diag = static_cast(trans.get()); auto dtrans = dcdiag->conj_transpose(); - auto dtrans_diag = static_cast(dtrans.get()); + auto dtrans_diag = static_cast(dtrans.get()); GKO_ASSERT_MTX_NEAR(trans_diag, dtrans_diag, 0); } diff --git a/omp/test/matrix/sparsity_csr_kernels.cpp b/omp/test/matrix/sparsity_csr_kernels.cpp index 730700ee8e7..1ad2ba91506 100644 --- a/omp/test/matrix/sparsity_csr_kernels.cpp +++ b/omp/test/matrix/sparsity_csr_kernels.cpp @@ -203,8 +203,8 @@ TEST_F(SparsityCsr, TransposeIsEquivalentToRef) auto trans = mtx->transpose(); auto d_trans = dmtx->transpose(); - GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), - static_cast(trans.get()), 0.0); + GKO_ASSERT_MTX_NEAR(static_cast(d_trans.get()), + static_cast(trans.get()), 0.0); } diff --git a/omp/test/multigrid/amgx_pgm_kernels.cpp b/omp/test/multigrid/amgx_pgm_kernels.cpp index 7ebd3fbb7a2..b1ef4374630 100644 --- a/omp/test/multigrid/amgx_pgm_kernels.cpp +++ b/omp/test/multigrid/amgx_pgm_kernels.cpp @@ -144,7 +144,7 @@ class AmgxPgm : public ::testing::Test { d_system_mtx = gko::clone(omp, system_mtx); } - void make_weight(Mtx *mtx) + void make_weight(Mtx* mtx) { gko::test::make_symmetric(mtx); // it is only works for real value case. diff --git a/omp/test/preconditioner/isai_kernels.cpp b/omp/test/preconditioner/isai_kernels.cpp index 631a72e7a24..1f812e8afe1 100644 --- a/omp/test/preconditioner/isai_kernels.cpp +++ b/omp/test/preconditioner/isai_kernels.cpp @@ -69,7 +69,7 @@ class Isai : public ::testing::Test { omp = gko::OmpExecutor::create(); } - std::unique_ptr clone_allocations(const Csr *csr_mtx) + std::unique_ptr clone_allocations(const Csr* csr_mtx) { if (csr_mtx->get_executor() != ref) { return {nullptr}; @@ -115,7 +115,7 @@ class Isai : public ::testing::Test { d_inverse = gko::clone(omp, inverse); } - void ensure_diagonal(Dense *mtx) + void ensure_diagonal(Dense* mtx) { for (int i = 0; i < mtx->get_size()[0]; ++i) { mtx->at(i, i) = gko::one(); diff --git a/omp/test/reorder/rcm_kernels.cpp b/omp/test/reorder/rcm_kernels.cpp index 18cd5850451..a31ba3f8bcf 100644 --- a/omp/test/reorder/rcm_kernels.cpp +++ b/omp/test/reorder/rcm_kernels.cpp @@ -76,8 +76,8 @@ class Rcm : public ::testing::Test { static void ubfs_reference( std::shared_ptr mtx, - i_type - *const levels, // Must be inf/max in all nodes connected to source + i_type* const + levels, // Must be inf/max in all nodes connected to source const i_type start) { const auto row_ptrs = mtx->get_const_row_ptrs(); @@ -110,7 +110,7 @@ class Rcm : public ::testing::Test { static bool is_valid_start_node(std::shared_ptr mtx, std::shared_ptr reorder, i_type start, - std::vector &already_visited) + std::vector& already_visited) { if (already_visited[start]) { return false; diff --git a/omp/test/solver/cb_gmres_kernels.cpp b/omp/test/solver/cb_gmres_kernels.cpp index e1cbe5b8e18..8b7477d2d01 100644 --- a/omp/test/solver/cb_gmres_kernels.cpp +++ b/omp/test/solver/cb_gmres_kernels.cpp @@ -97,7 +97,7 @@ class CbGmres : public ::testing::Test { Range3dHelper generate_krylov_helper(gko::dim<3> size) { auto helper = Range3dHelper{ref, size}; - auto &bases = helper.get_bases(); + auto& bases = helper.get_bases(); const auto num_rows = size[0] * size[1]; const auto num_cols = size[2]; auto temp_krylov_bases = gko::test::generate_random_matrix( @@ -193,7 +193,7 @@ class CbGmres : public ::testing::Test { void assert_krylov_bases_near() { gko::Array d_to_host{ref}; - auto &krylov_bases = range_helper.get_bases(); + auto& krylov_bases = range_helper.get_bases(); d_to_host = d_range_helper.get_bases(); const auto tolerance = r::value; using std::abs; diff --git a/reference/components/absolute_array.cpp b/reference/components/absolute_array.cpp index 287e4cfeb95..609f53aeecb 100644 --- a/reference/components/absolute_array.cpp +++ b/reference/components/absolute_array.cpp @@ -41,7 +41,7 @@ namespace components { template void inplace_absolute_array(std::shared_ptr exec, - ValueType *data, size_type n) + ValueType* data, size_type n) { for (size_type i = 0; i < n; i++) { data[i] = abs(data[i]); @@ -53,8 +53,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_INPLACE_ABSOLUTE_ARRAY_KERNEL); template void outplace_absolute_array(std::shared_ptr exec, - const ValueType *in, size_type n, - remove_complex *out) + const ValueType* in, size_type n, + remove_complex* out) { for (size_type i = 0; i < n; i++) { out[i] = abs(in[i]); diff --git a/reference/components/convert_ptrs.hpp b/reference/components/convert_ptrs.hpp index ad59da923cc..66bbddea554 100644 --- a/reference/components/convert_ptrs.hpp +++ b/reference/components/convert_ptrs.hpp @@ -44,8 +44,8 @@ namespace csr { template -inline void convert_ptrs_to_idxs(const IndexType *ptrs, size_type num_rows, - IndexType *idxs) +inline void convert_ptrs_to_idxs(const IndexType* ptrs, size_type num_rows, + IndexType* idxs) { size_type ind = 0; diff --git a/reference/components/csr_spgeam.hpp b/reference/components/csr_spgeam.hpp index c8149f9186f..32c70e32810 100644 --- a/reference/components/csr_spgeam.hpp +++ b/reference/components/csr_spgeam.hpp @@ -57,8 +57,8 @@ namespace reference { */ template -void abstract_spgeam(const matrix::Csr *a, - const matrix::Csr *b, +void abstract_spgeam(const matrix::Csr* a, + const matrix::Csr* b, BeginCallback begin_cb, EntryCallback entry_cb, EndCallback end_cb) { diff --git a/reference/components/fill_array.cpp b/reference/components/fill_array.cpp index b4c905f9e5d..d1ab70fdce3 100644 --- a/reference/components/fill_array.cpp +++ b/reference/components/fill_array.cpp @@ -43,7 +43,7 @@ namespace components { template -void fill_array(std::shared_ptr exec, ValueType *array, +void fill_array(std::shared_ptr exec, ValueType* array, size_type n, ValueType val) { std::fill_n(array, n, val); @@ -54,7 +54,7 @@ GKO_INSTANTIATE_FOR_EACH_TEMPLATE_TYPE(GKO_DECLARE_FILL_ARRAY_KERNEL); template void fill_seq_array(std::shared_ptr exec, - ValueType *array, size_type n) + ValueType* array, size_type n) { std::iota(array, array + n, 0); } diff --git a/reference/components/format_conversion.hpp b/reference/components/format_conversion.hpp index ebf24d3acd5..5a7fcd251d1 100644 --- a/reference/components/format_conversion.hpp +++ b/reference/components/format_conversion.hpp @@ -43,8 +43,8 @@ namespace reference { template -inline void convert_idxs_to_ptrs(const IndexType *idxs, size_type num_nonzeros, - IndexType *ptrs, size_type length) +inline void convert_idxs_to_ptrs(const IndexType* idxs, size_type num_nonzeros, + IndexType* ptrs, size_type length) { std::fill(ptrs, ptrs + length, 0); std::for_each(idxs, idxs + num_nonzeros, [&](size_type v) { @@ -57,8 +57,8 @@ inline void convert_idxs_to_ptrs(const IndexType *idxs, size_type num_nonzeros, template -inline void convert_ptrs_to_idxs(const IndexType *ptrs, size_type num_rows, - IndexType *idxs) +inline void convert_ptrs_to_idxs(const IndexType* ptrs, size_type num_rows, + IndexType* idxs) { size_type ind = 0; diff --git a/reference/components/matrix_operations.hpp b/reference/components/matrix_operations.hpp index 97a639bf907..a7156dc6386 100644 --- a/reference/components/matrix_operations.hpp +++ b/reference/components/matrix_operations.hpp @@ -50,7 +50,7 @@ namespace reference { template remove_complex compute_inf_norm(size_type num_rows, size_type num_cols, - const ValueType *matrix, + const ValueType* matrix, size_type stride) { auto result = zero>(); diff --git a/reference/components/precision_conversion.cpp b/reference/components/precision_conversion.cpp index 7fcffeb8300..064581a3f71 100644 --- a/reference/components/precision_conversion.cpp +++ b/reference/components/precision_conversion.cpp @@ -44,7 +44,7 @@ namespace components { template void convert_precision(std::shared_ptr exec, - size_type size, const SourceType *in, TargetType *out) + size_type size, const SourceType* in, TargetType* out) { std::copy_n(in, size, out); } diff --git a/reference/components/prefix_sum.cpp b/reference/components/prefix_sum.cpp index 9cdf95bdd30..11836a9cf63 100644 --- a/reference/components/prefix_sum.cpp +++ b/reference/components/prefix_sum.cpp @@ -41,7 +41,7 @@ namespace components { template void prefix_sum(std::shared_ptr exec, - IndexType *counts, size_type num_entries) + IndexType* counts, size_type num_entries) { IndexType partial_sum{}; for (size_type i = 0; i < num_entries; ++i) { diff --git a/reference/factorization/factorization_kernels.cpp b/reference/factorization/factorization_kernels.cpp index dfae5dfe08e..650e0af93e7 100644 --- a/reference/factorization/factorization_kernels.cpp +++ b/reference/factorization/factorization_kernels.cpp @@ -58,8 +58,8 @@ namespace factorization { template size_type count_missing_elements(IndexType num_rows, IndexType num_cols, - const IndexType *col_idxs, - const IndexType *row_ptrs) + const IndexType* col_idxs, + const IndexType* row_ptrs) { size_type missing_elements{}; // if row >= num_cols, diagonal elements no longer exist @@ -82,7 +82,7 @@ size_type count_missing_elements(IndexType num_rows, IndexType num_cols, template void add_diagonal_elements(std::shared_ptr exec, - matrix::Csr *mtx, + matrix::Csr* mtx, bool /*is_sorted*/) { const auto values = mtx->get_const_values(); @@ -164,8 +164,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_row_ptrs_l_u( std::shared_ptr exec, - const matrix::Csr *system_matrix, - IndexType *l_row_ptrs, IndexType *u_row_ptrs) + const matrix::Csr* system_matrix, + IndexType* l_row_ptrs, IndexType* u_row_ptrs) { auto row_ptrs = system_matrix->get_const_row_ptrs(); auto col_idxs = system_matrix->get_const_col_idxs(); @@ -195,9 +195,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_l_u(std::shared_ptr exec, - const matrix::Csr *system_matrix, - matrix::Csr *csr_l, - matrix::Csr *csr_u) + const matrix::Csr* system_matrix, + matrix::Csr* csr_l, + matrix::Csr* csr_u) { const auto row_ptrs = system_matrix->get_const_row_ptrs(); const auto col_idxs = system_matrix->get_const_col_idxs(); @@ -250,8 +250,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_row_ptrs_l( std::shared_ptr exec, - const matrix::Csr *system_matrix, - IndexType *l_row_ptrs) + const matrix::Csr* system_matrix, + IndexType* l_row_ptrs) { auto row_ptrs = system_matrix->get_const_row_ptrs(); auto col_idxs = system_matrix->get_const_col_idxs(); @@ -276,8 +276,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_l(std::shared_ptr exec, - const matrix::Csr *system_matrix, - matrix::Csr *csr_l, bool diag_sqrt) + const matrix::Csr* system_matrix, + matrix::Csr* csr_l, bool diag_sqrt) { const auto row_ptrs = system_matrix->get_const_row_ptrs(); const auto col_idxs = system_matrix->get_const_col_idxs(); diff --git a/reference/factorization/ic_kernels.cpp b/reference/factorization/ic_kernels.cpp index fff0811ce92..6b990fdaa47 100644 --- a/reference/factorization/ic_kernels.cpp +++ b/reference/factorization/ic_kernels.cpp @@ -52,7 +52,7 @@ namespace ic_factorization { template void compute(std::shared_ptr exec, - matrix::Csr *m) + matrix::Csr* m) { vector diagonals{m->get_size()[0], -1, exec}; const auto row_ptrs = m->get_const_row_ptrs(); diff --git a/reference/factorization/ilu_kernels.cpp b/reference/factorization/ilu_kernels.cpp index 80c2e61098f..06a29d4b5d3 100644 --- a/reference/factorization/ilu_kernels.cpp +++ b/reference/factorization/ilu_kernels.cpp @@ -55,7 +55,7 @@ namespace ilu_factorization { template void compute_lu(std::shared_ptr exec, - matrix::Csr *m) + matrix::Csr* m) { vector diagonals{m->get_size()[0], -1, exec}; const auto row_ptrs = m->get_const_row_ptrs(); diff --git a/reference/factorization/par_ic_kernels.cpp b/reference/factorization/par_ic_kernels.cpp index 95cc8aa1cda..26d8790e1db 100644 --- a/reference/factorization/par_ic_kernels.cpp +++ b/reference/factorization/par_ic_kernels.cpp @@ -54,7 +54,7 @@ namespace par_ic_factorization { template void init_factor(std::shared_ptr exec, - matrix::Csr *l) + matrix::Csr* l) { auto num_rows = l->get_size()[0]; auto l_row_ptrs = l->get_const_row_ptrs(); @@ -83,8 +83,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void compute_factor(std::shared_ptr exec, size_type /* num_iterations */, - const matrix::Coo *a_lower, - matrix::Csr *l) + const matrix::Coo* a_lower, + matrix::Csr* l) { auto num_rows = a_lower->get_size()[0]; auto l_row_ptrs = l->get_const_row_ptrs(); diff --git a/reference/factorization/par_ict_kernels.cpp b/reference/factorization/par_ict_kernels.cpp index 399bd264b7a..847037e8edf 100644 --- a/reference/factorization/par_ict_kernels.cpp +++ b/reference/factorization/par_ict_kernels.cpp @@ -64,9 +64,9 @@ namespace par_ict_factorization { template void compute_factor(std::shared_ptr exec, - const matrix::Csr *a, - matrix::Csr *l, - const matrix::Coo *) + const matrix::Csr* a, + matrix::Csr* l, + const matrix::Coo*) { auto num_rows = a->get_size()[0]; auto l_row_ptrs = l->get_const_row_ptrs(); @@ -126,10 +126,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void add_candidates(std::shared_ptr exec, - const matrix::Csr *llh, - const matrix::Csr *a, - const matrix::Csr *l, - matrix::Csr *l_new) + const matrix::Csr* llh, + const matrix::Csr* a, + const matrix::Csr* l, + matrix::Csr* l_new) { auto num_rows = a->get_size()[0]; auto l_row_ptrs = l->get_const_row_ptrs(); @@ -174,7 +174,7 @@ void add_candidates(std::shared_ptr exec, return state; }, [&](IndexType row, IndexType col, ValueType a_val, ValueType llh_val, - row_state &state) { + row_state& state) { auto r_val = a_val - llh_val; // load matching entry of L auto l_col = checked_load(l_col_idxs, state.l_old_begin, diff --git a/reference/factorization/par_ilu_kernels.cpp b/reference/factorization/par_ilu_kernels.cpp index 11d6341fb5f..87c7a02aeae 100644 --- a/reference/factorization/par_ilu_kernels.cpp +++ b/reference/factorization/par_ilu_kernels.cpp @@ -55,9 +55,9 @@ namespace par_ilu_factorization { template void compute_l_u_factors(std::shared_ptr exec, size_type iterations, - const matrix::Coo *system_matrix, - matrix::Csr *l_factor, - matrix::Csr *u_factor) + const matrix::Coo* system_matrix, + matrix::Csr* l_factor, + matrix::Csr* u_factor) { // If `iterations` is set to `Auto`, a single iteration is sufficient since // it is computed sequentially diff --git a/reference/factorization/par_ilut_kernels.cpp b/reference/factorization/par_ilut_kernels.cpp index 82cb34579df..39a37507573 100644 --- a/reference/factorization/par_ilut_kernels.cpp +++ b/reference/factorization/par_ilut_kernels.cpp @@ -71,10 +71,10 @@ namespace par_ilut_factorization { */ template void threshold_select(std::shared_ptr exec, - const matrix::Csr *m, - IndexType rank, Array &tmp, - Array> &, - remove_complex &threshold) + const matrix::Csr* m, + IndexType rank, Array& tmp, + Array>&, + remove_complex& threshold) { auto values = m->get_const_values(); IndexType size = m->get_num_stored_elements(); @@ -101,9 +101,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( */ template void abstract_filter(std::shared_ptr exec, - const matrix::Csr *m, - matrix::Csr *m_out, - matrix::Coo *m_out_coo, + const matrix::Csr* m, + matrix::Csr* m_out, + matrix::Coo* m_out_coo, Predicate pred) { auto num_rows = m->get_size()[0]; @@ -132,7 +132,7 @@ void abstract_filter(std::shared_ptr exec, builder.get_value_array().resize_and_reset(new_nnz); auto new_col_idxs = m_out->get_col_idxs(); auto new_vals = m_out->get_values(); - IndexType *new_row_idxs{}; + IndexType* new_row_idxs{}; if (m_out_coo) { matrix::CooBuilder coo_builder{m_out_coo}; coo_builder.get_row_idx_array().resize_and_reset(new_nnz); @@ -168,10 +168,10 @@ void abstract_filter(std::shared_ptr exec, */ template void threshold_filter(std::shared_ptr exec, - const matrix::Csr *m, + const matrix::Csr* m, remove_complex threshold, - matrix::Csr *m_out, - matrix::Coo *m_out_coo, bool) + matrix::Csr* m_out, + matrix::Coo* m_out_coo, bool) { auto col_idxs = m->get_const_col_idxs(); auto vals = m->get_const_values(); @@ -197,11 +197,11 @@ constexpr auto sample_size = bucket_count * sampleselect_oversampling; */ template void threshold_filter_approx(std::shared_ptr exec, - const matrix::Csr *m, - IndexType rank, Array &tmp, - remove_complex &threshold, - matrix::Csr *m_out, - matrix::Coo *m_out_coo) + const matrix::Csr* m, + IndexType rank, Array& tmp, + remove_complex& threshold, + matrix::Csr* m_out, + matrix::Coo* m_out_coo) { auto vals = m->get_const_values(); auto col_idxs = m->get_const_col_idxs(); @@ -212,7 +212,7 @@ void threshold_filter_approx(std::shared_ptr exec, sizeof(ValueType)); tmp.resize_and_reset(storage_size); // pick and sort sample - auto sample = reinterpret_cast(tmp.get_data()); + auto sample = reinterpret_cast(tmp.get_data()); // assuming rounding towards zero auto stride = double(size) / sample_size; for (IndexType i = 0; i < sample_size; ++i) { @@ -225,7 +225,7 @@ void threshold_filter_approx(std::shared_ptr exec, sample[i] = sample[(i + 1) * sampleselect_oversampling]; } // count elements per bucket - auto histogram = reinterpret_cast(sample + bucket_count); + auto histogram = reinterpret_cast(sample + bucket_count); for (IndexType bucket = 0; bucket < bucket_count; ++bucket) { histogram[bucket] = 0; } @@ -263,12 +263,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( */ template void compute_l_u_factors(std::shared_ptr exec, - const matrix::Csr *a, - matrix::Csr *l, - const matrix::Coo *, - matrix::Csr *u, - const matrix::Coo *, - matrix::Csr *u_csc) + const matrix::Csr* a, + matrix::Csr* l, + const matrix::Coo*, + matrix::Csr* u, + const matrix::Coo*, + matrix::Csr* u_csc) { auto num_rows = a->get_size()[0]; auto l_row_ptrs = l->get_const_row_ptrs(); @@ -353,12 +353,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( */ template void add_candidates(std::shared_ptr exec, - const matrix::Csr *lu, - const matrix::Csr *a, - const matrix::Csr *l, - const matrix::Csr *u, - matrix::Csr *l_new, - matrix::Csr *u_new) + const matrix::Csr* lu, + const matrix::Csr* a, + const matrix::Csr* l, + const matrix::Csr* u, + matrix::Csr* l_new, + matrix::Csr* u_new) { auto num_rows = a->get_size()[0]; auto l_row_ptrs = l->get_const_row_ptrs(); @@ -424,7 +424,7 @@ void add_candidates(std::shared_ptr exec, return state; }, [&](IndexType row, IndexType col, ValueType a_val, ValueType lu_val, - row_state &state) { + row_state& state) { auto r_val = a_val - lu_val; // load matching entry of L + U auto lpu_col = state.finished_l diff --git a/reference/matrix/coo_kernels.cpp b/reference/matrix/coo_kernels.cpp index b695d266fe3..c3a68d21364 100644 --- a/reference/matrix/coo_kernels.cpp +++ b/reference/matrix/coo_kernels.cpp @@ -61,8 +61,8 @@ namespace coo { template void spmv(std::shared_ptr exec, - const matrix::Coo *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Coo* a, + const matrix::Dense* b, matrix::Dense* c) { dense::fill(exec, c, zero()); spmv2(exec, a, b, c); @@ -73,11 +73,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Coo *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Coo* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { dense::scale(exec, beta, c); advanced_spmv2(exec, alpha, a, b, c); @@ -89,8 +89,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void spmv2(std::shared_ptr exec, - const matrix::Coo *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Coo* a, + const matrix::Dense* b, matrix::Dense* c) { auto coo_val = a->get_const_values(); auto coo_col = a->get_const_col_idxs(); @@ -108,10 +108,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV2_KERNEL); template void advanced_spmv2(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Coo *a, - const matrix::Dense *b, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Coo* a, + const matrix::Dense* b, + matrix::Dense* c) { auto coo_val = a->get_const_values(); auto coo_col = a->get_const_col_idxs(); @@ -132,8 +132,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_row_idxs_to_ptrs(std::shared_ptr exec, - const IndexType *idxs, size_type num_nonzeros, - IndexType *ptrs, size_type length) + const IndexType* idxs, size_type num_nonzeros, + IndexType* ptrs, size_type length) { convert_idxs_to_ptrs(idxs, num_nonzeros, ptrs, length); } @@ -141,8 +141,8 @@ void convert_row_idxs_to_ptrs(std::shared_ptr exec, template void convert_to_csr(std::shared_ptr exec, - const matrix::Coo *source, - matrix::Csr *result) + const matrix::Coo* source, + matrix::Csr* result) { auto num_rows = result->get_size()[0]; @@ -161,8 +161,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Coo *source, - matrix::Dense *result) + const matrix::Coo* source, + matrix::Dense* result) { auto coo_val = source->get_const_values(); auto coo_col = source->get_const_col_idxs(); @@ -185,8 +185,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Coo *orig, - matrix::Diagonal *diag) + const matrix::Coo* orig, + matrix::Diagonal* diag) { const auto row_idxs = orig->get_const_row_idxs(); const auto col_idxs = orig->get_const_col_idxs(); diff --git a/reference/matrix/csr_kernels.cpp b/reference/matrix/csr_kernels.cpp index ec7ac790c7b..37d8b064ee5 100644 --- a/reference/matrix/csr_kernels.cpp +++ b/reference/matrix/csr_kernels.cpp @@ -70,8 +70,8 @@ namespace csr { template void spmv(std::shared_ptr exec, - const matrix::Csr *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Csr* a, + const matrix::Dense* b, matrix::Dense* c) { auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); @@ -97,11 +97,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Csr *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Csr* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); @@ -129,8 +129,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void spgemm_insert_row(unordered_set &cols, - const matrix::Csr *c, +void spgemm_insert_row(unordered_set& cols, + const matrix::Csr* c, size_type row) { auto row_ptrs = c->get_const_row_ptrs(); @@ -140,9 +140,9 @@ void spgemm_insert_row(unordered_set &cols, template -void spgemm_insert_row2(unordered_set &cols, - const matrix::Csr *a, - const matrix::Csr *b, +void spgemm_insert_row2(unordered_set& cols, + const matrix::Csr* a, + const matrix::Csr* b, size_type row) { auto a_row_ptrs = a->get_const_row_ptrs(); @@ -160,8 +160,8 @@ void spgemm_insert_row2(unordered_set &cols, template -void spgemm_accumulate_row(map &cols, - const matrix::Csr *c, +void spgemm_accumulate_row(map& cols, + const matrix::Csr* c, ValueType scale, size_type row) { auto row_ptrs = c->get_const_row_ptrs(); @@ -177,9 +177,9 @@ void spgemm_accumulate_row(map &cols, template -void spgemm_accumulate_row2(map &cols, - const matrix::Csr *a, - const matrix::Csr *b, +void spgemm_accumulate_row2(map& cols, + const matrix::Csr* a, + const matrix::Csr* b, ValueType scale, size_type row) { auto a_row_ptrs = a->get_const_row_ptrs(); @@ -205,9 +205,9 @@ void spgemm_accumulate_row2(map &cols, template void spgemm(std::shared_ptr exec, - const matrix::Csr *a, - const matrix::Csr *b, - matrix::Csr *c) + const matrix::Csr* a, + const matrix::Csr* b, + matrix::Csr* c) { auto num_rows = a->get_size()[0]; @@ -227,8 +227,8 @@ void spgemm(std::shared_ptr exec, // second sweep: accumulate non-zeros auto new_nnz = c_row_ptrs[num_rows]; matrix::CsrBuilder c_builder{c}; - auto &c_col_idxs_array = c_builder.get_col_idx_array(); - auto &c_vals_array = c_builder.get_value_array(); + auto& c_col_idxs_array = c_builder.get_col_idx_array(); + auto& c_vals_array = c_builder.get_value_array(); c_col_idxs_array.resize_and_reset(new_nnz); c_vals_array.resize_and_reset(new_nnz); auto c_col_idxs = c_col_idxs_array.get_data(); @@ -253,12 +253,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL); template void advanced_spgemm(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Csr *a, - const matrix::Csr *b, - const matrix::Dense *beta, - const matrix::Csr *d, - matrix::Csr *c) + const matrix::Dense* alpha, + const matrix::Csr* a, + const matrix::Csr* b, + const matrix::Dense* beta, + const matrix::Csr* d, + matrix::Csr* c) { auto num_rows = a->get_size()[0]; auto valpha = alpha->at(0, 0); @@ -281,8 +281,8 @@ void advanced_spgemm(std::shared_ptr exec, // second sweep: accumulate non-zeros auto new_nnz = c_row_ptrs[num_rows]; matrix::CsrBuilder c_builder{c}; - auto &c_col_idxs_array = c_builder.get_col_idx_array(); - auto &c_vals_array = c_builder.get_value_array(); + auto& c_col_idxs_array = c_builder.get_col_idx_array(); + auto& c_vals_array = c_builder.get_value_array(); c_col_idxs_array.resize_and_reset(new_nnz); c_vals_array.resize_and_reset(new_nnz); auto c_col_idxs = c_col_idxs_array.get_data(); @@ -309,11 +309,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void spgeam(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Csr *a, - const matrix::Dense *beta, - const matrix::Csr *b, - matrix::Csr *c) + const matrix::Dense* alpha, + const matrix::Csr* a, + const matrix::Dense* beta, + const matrix::Csr* b, + matrix::Csr* c) { auto num_rows = a->get_size()[0]; auto valpha = alpha->at(0, 0); @@ -324,7 +324,7 @@ void spgeam(std::shared_ptr exec, abstract_spgeam( a, b, [](IndexType) { return IndexType{}; }, - [](IndexType, IndexType, ValueType, ValueType, IndexType &nnz) { + [](IndexType, IndexType, ValueType, ValueType, IndexType& nnz) { ++nnz; }, [&](IndexType row, IndexType nnz) { c_row_ptrs[row] = nnz; }); @@ -335,8 +335,8 @@ void spgeam(std::shared_ptr exec, // second sweep: accumulate non-zeros auto new_nnz = c_row_ptrs[num_rows]; matrix::CsrBuilder c_builder{c}; - auto &c_col_idxs_array = c_builder.get_col_idx_array(); - auto &c_vals_array = c_builder.get_value_array(); + auto& c_col_idxs_array = c_builder.get_col_idx_array(); + auto& c_vals_array = c_builder.get_value_array(); c_col_idxs_array.resize_and_reset(new_nnz); c_vals_array.resize_and_reset(new_nnz); auto c_col_idxs = c_col_idxs_array.get_data(); @@ -345,7 +345,7 @@ void spgeam(std::shared_ptr exec, abstract_spgeam( a, b, [&](IndexType row) { return c_row_ptrs[row]; }, [&](IndexType, IndexType col, ValueType a_val, ValueType b_val, - IndexType &nz) { + IndexType& nz) { c_vals[nz] = valpha * a_val + vbeta * b_val; c_col_idxs[nz] = col; ++nz; @@ -358,8 +358,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL); template void convert_row_ptrs_to_idxs(std::shared_ptr exec, - const IndexType *ptrs, size_type num_rows, - IndexType *idxs) + const IndexType* ptrs, size_type num_rows, + IndexType* idxs) { convert_ptrs_to_idxs(ptrs, num_rows, idxs); } @@ -367,8 +367,8 @@ void convert_row_ptrs_to_idxs(std::shared_ptr exec, template void convert_to_coo(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Coo *result) + const matrix::Csr* source, + matrix::Coo* result) { auto num_rows = result->get_size()[0]; @@ -383,8 +383,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Dense *result) + const matrix::Csr* source, + matrix::Dense* result) { auto num_rows = source->get_size()[0]; auto num_cols = source->get_size()[1]; @@ -409,8 +409,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sellp(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Sellp *result) + const matrix::Csr* source, + matrix::Sellp* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -484,8 +484,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_total_cols(std::shared_ptr exec, - const matrix::Csr *source, - size_type *result, size_type stride_factor, + const matrix::Csr* source, + size_type* result, size_type stride_factor, size_type slice_size) { size_type total_cols = 0; @@ -516,8 +516,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_ell(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Ell *result) + const matrix::Csr* source, + matrix::Ell* result) { const auto num_rows = source->get_size()[0]; const auto num_cols = source->get_size()[1]; @@ -546,10 +546,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -inline void convert_csr_to_csc(size_type num_rows, const IndexType *row_ptrs, - const IndexType *col_idxs, - const ValueType *csr_vals, IndexType *row_idxs, - IndexType *col_ptrs, ValueType *csc_vals, +inline void convert_csr_to_csc(size_type num_rows, const IndexType* row_ptrs, + const IndexType* col_idxs, + const ValueType* csr_vals, IndexType* row_idxs, + IndexType* col_ptrs, ValueType* csc_vals, UnaryOperator op) { for (size_type row = 0; row < num_rows; ++row) { @@ -564,8 +564,8 @@ inline void convert_csr_to_csc(size_type num_rows, const IndexType *row_ptrs, template void transpose_and_transform(std::shared_ptr exec, - matrix::Csr *trans, - const matrix::Csr *orig, + matrix::Csr* trans, + const matrix::Csr* orig, UnaryOperator op) { auto trans_row_ptrs = trans->get_row_ptrs(); @@ -590,8 +590,8 @@ void transpose_and_transform(std::shared_ptr exec, template void transpose(std::shared_ptr exec, - const matrix::Csr *orig, - matrix::Csr *trans) + const matrix::Csr* orig, + matrix::Csr* trans) { transpose_and_transform(exec, trans, orig, [](const ValueType x) { return x; }); @@ -602,8 +602,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_TRANSPOSE_KERNEL); template void conj_transpose(std::shared_ptr exec, - const matrix::Csr *orig, - matrix::Csr *trans) + const matrix::Csr* orig, + matrix::Csr* trans) { transpose_and_transform(exec, trans, orig, [](const ValueType x) { return conj(x); }); @@ -615,8 +615,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_max_nnz_per_row(std::shared_ptr exec, - const matrix::Csr *source, - size_type *result) + const matrix::Csr* source, + size_type* result) { const auto num_rows = source->get_size()[0]; const auto row_ptrs = source->get_const_row_ptrs(); @@ -635,8 +635,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_hybrid(std::shared_ptr exec, - const matrix::Csr *source, - matrix::Hybrid *result) + const matrix::Csr* source, + matrix::Hybrid* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -691,8 +691,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void invert_permutation(std::shared_ptr exec, - size_type size, const IndexType *permutation_indices, - IndexType *inv_permutation) + size_type size, const IndexType* permutation_indices, + IndexType* inv_permutation) { for (IndexType i = 0; i < static_cast(size); ++i) { inv_permutation[permutation_indices[i]] = i; @@ -704,9 +704,9 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_INVERT_PERMUTATION_KERNEL); template void inv_symm_permute(std::shared_ptr exec, - const IndexType *perm, - const matrix::Csr *orig, - matrix::Csr *permuted) + const IndexType* perm, + const matrix::Csr* orig, + matrix::Csr* permuted) { auto in_row_ptrs = orig->get_const_row_ptrs(); auto in_col_idxs = orig->get_const_col_idxs(); @@ -741,9 +741,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void row_permute(std::shared_ptr exec, - const IndexType *perm, - const matrix::Csr *orig, - matrix::Csr *row_permuted) + const IndexType* perm, + const matrix::Csr* orig, + matrix::Csr* row_permuted) { auto in_row_ptrs = orig->get_const_row_ptrs(); auto in_col_idxs = orig->get_const_col_idxs(); @@ -776,9 +776,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inverse_row_permute(std::shared_ptr exec, - const IndexType *perm, - const matrix::Csr *orig, - matrix::Csr *row_permuted) + const IndexType* perm, + const matrix::Csr* orig, + matrix::Csr* row_permuted) { auto in_row_ptrs = orig->get_const_row_ptrs(); auto in_col_idxs = orig->get_const_col_idxs(); @@ -811,9 +811,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inverse_column_permute(std::shared_ptr exec, - const IndexType *perm, - const matrix::Csr *orig, - matrix::Csr *column_permuted) + const IndexType* perm, + const matrix::Csr* orig, + matrix::Csr* column_permuted) { auto in_row_ptrs = orig->get_const_row_ptrs(); auto in_col_idxs = orig->get_const_col_idxs(); @@ -841,8 +841,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_nonzeros_per_row(std::shared_ptr exec, - const matrix::Csr *source, - Array *result) + const matrix::Csr* source, + Array* result) { const auto row_ptrs = source->get_const_row_ptrs(); auto row_nnz_val = result->get_data(); @@ -857,7 +857,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void sort_by_column_index(std::shared_ptr exec, - matrix::Csr *to_sort) + matrix::Csr* to_sort) { auto values = to_sort->get_values(); auto row_ptrs = to_sort->get_row_ptrs(); @@ -879,7 +879,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::Csr *to_check, bool *is_sorted) + const matrix::Csr* to_check, bool* is_sorted) { const auto row_ptrs = to_check->get_const_row_ptrs(); const auto col_idxs = to_check->get_const_col_idxs(); @@ -902,8 +902,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Csr *orig, - matrix::Diagonal *diag) + const matrix::Csr* orig, + matrix::Diagonal* diag) { const auto row_ptrs = orig->get_const_row_ptrs(); const auto col_idxs = orig->get_const_col_idxs(); diff --git a/reference/matrix/dense_kernels.cpp b/reference/matrix/dense_kernels.cpp index b092b376763..d9a1bfd3887 100644 --- a/reference/matrix/dense_kernels.cpp +++ b/reference/matrix/dense_kernels.cpp @@ -61,9 +61,9 @@ namespace dense { template void simple_apply(std::shared_ptr exec, - const matrix::Dense *a, - const matrix::Dense *b, - matrix::Dense *c) + const matrix::Dense* a, + const matrix::Dense* b, + matrix::Dense* c) { for (size_type row = 0; row < c->get_size()[0]; ++row) { for (size_type col = 0; col < c->get_size()[1]; ++col) { @@ -85,9 +85,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SIMPLE_APPLY_KERNEL); template void apply(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Dense *a, const matrix::Dense *b, - const matrix::Dense *beta, matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Dense* a, const matrix::Dense* b, + const matrix::Dense* beta, matrix::Dense* c) { if (beta->at(0, 0) != zero()) { for (size_type row = 0; row < c->get_size()[0]; ++row) { @@ -118,8 +118,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_APPLY_KERNEL); template void copy(std::shared_ptr exec, - const matrix::Dense *input, - matrix::Dense *output) + const matrix::Dense* input, + matrix::Dense* output) { for (size_type row = 0; row < input->get_size()[0]; ++row) { for (size_type col = 0; col < input->get_size()[1]; ++col) { @@ -135,7 +135,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_CONVERSION_OR_COPY( template void fill(std::shared_ptr exec, - matrix::Dense *mat, ValueType value) + matrix::Dense* mat, ValueType value) { for (size_type row = 0; row < mat->get_size()[0]; ++row) { for (size_type col = 0; col < mat->get_size()[1]; ++col) { @@ -149,7 +149,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_FILL_KERNEL); template void scale(std::shared_ptr exec, - const matrix::Dense *alpha, matrix::Dense *x) + const matrix::Dense* alpha, matrix::Dense* x) { if (alpha->get_size()[1] == 1) { for (size_type i = 0; i < x->get_size()[0]; ++i) { @@ -171,8 +171,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE(GKO_DECLARE_DENSE_SCALE_KERNEL); template void inv_scale(std::shared_ptr exec, - const matrix::Dense *alpha, - matrix::Dense *x) + const matrix::Dense* alpha, + matrix::Dense* x) { if (alpha->get_size()[1] == 1) { for (size_type i = 0; i < x->get_size()[0]; ++i) { @@ -195,8 +195,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE( template void add_scaled(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Dense *x, matrix::Dense *y) + const matrix::Dense* alpha, + const matrix::Dense* x, matrix::Dense* y) { if (alpha->get_size()[1] == 1) { for (size_type i = 0; i < x->get_size()[0]; ++i) { @@ -219,8 +219,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE( template void sub_scaled(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Dense *x, matrix::Dense *y) + const matrix::Dense* alpha, + const matrix::Dense* x, matrix::Dense* y) { if (alpha->get_size()[1] == 1) { for (size_type i = 0; i < x->get_size()[0]; ++i) { @@ -243,9 +243,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE( template void add_scaled_diag(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Diagonal *x, - matrix::Dense *y) + const matrix::Dense* alpha, + const matrix::Diagonal* x, + matrix::Dense* y) { const auto diag_values = x->get_const_values(); for (size_type i = 0; i < x->get_size()[0]; i++) { @@ -258,9 +258,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_ADD_SCALED_DIAG_KERNEL); template void sub_scaled_diag(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Diagonal *x, - matrix::Dense *y) + const matrix::Dense* alpha, + const matrix::Diagonal* x, + matrix::Dense* y) { const auto diag_values = x->get_const_values(); for (size_type i = 0; i < x->get_size()[0]; i++) { @@ -273,9 +273,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_SUB_SCALED_DIAG_KERNEL); template void compute_dot(std::shared_ptr exec, - const matrix::Dense *x, - const matrix::Dense *y, - matrix::Dense *result) + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) { for (size_type j = 0; j < x->get_size()[1]; ++j) { result->at(0, j) = zero(); @@ -292,9 +292,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_DOT_KERNEL); template void compute_conj_dot(std::shared_ptr exec, - const matrix::Dense *x, - const matrix::Dense *y, - matrix::Dense *result) + const matrix::Dense* x, + const matrix::Dense* y, + matrix::Dense* result) { for (size_type j = 0; j < x->get_size()[1]; ++j) { result->at(0, j) = zero(); @@ -311,8 +311,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_CONJ_DOT_KERNEL); template void compute_norm2(std::shared_ptr exec, - const matrix::Dense *x, - matrix::Dense> *result) + const matrix::Dense* x, + matrix::Dense>* result) { for (size_type j = 0; j < x->get_size()[1]; ++j) { result->at(0, j) = zero>(); @@ -332,8 +332,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COMPUTE_NORM2_KERNEL); template void convert_to_coo(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Coo *result) + const matrix::Dense* source, + matrix::Coo* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -363,8 +363,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Csr *result) + const matrix::Dense* source, + matrix::Csr* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -395,8 +395,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_ell(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Ell *result) + const matrix::Dense* source, + matrix::Ell* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -427,8 +427,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_hybrid(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Hybrid *result) + const matrix::Dense* source, + matrix::Hybrid* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -482,8 +482,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sellp(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Sellp *result) + const matrix::Dense* source, + matrix::Sellp* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -556,8 +556,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_sparsity_csr(std::shared_ptr exec, - const matrix::Dense *source, - matrix::SparsityCsr *result) + const matrix::Dense* source, + matrix::SparsityCsr* result) { auto num_rows = result->get_size()[0]; auto num_cols = result->get_size()[1]; @@ -586,7 +586,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Dense *source, size_type *result) + const matrix::Dense* source, size_type* result) { auto num_rows = source->get_size()[0]; auto num_cols = source->get_size()[1]; @@ -606,8 +606,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_COUNT_NONZEROS_KERNEL); template void calculate_max_nnz_per_row(std::shared_ptr exec, - const matrix::Dense *source, - size_type *result) + const matrix::Dense* source, + size_type* result) { auto num_rows = source->get_size()[0]; auto num_cols = source->get_size()[1]; @@ -631,8 +631,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void calculate_nonzeros_per_row(std::shared_ptr exec, - const matrix::Dense *source, - Array *result) + const matrix::Dense* source, + Array* result) { auto num_rows = source->get_size()[0]; auto num_cols = source->get_size()[1]; @@ -652,8 +652,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void calculate_total_cols(std::shared_ptr exec, - const matrix::Dense *source, - size_type *result, size_type stride_factor, + const matrix::Dense* source, + size_type* result, size_type stride_factor, size_type slice_size) { auto num_rows = source->get_size()[0]; @@ -685,8 +685,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void transpose(std::shared_ptr exec, - const matrix::Dense *orig, - matrix::Dense *trans) + const matrix::Dense* orig, + matrix::Dense* trans) { for (size_type i = 0; i < orig->get_size()[0]; ++i) { for (size_type j = 0; j < orig->get_size()[1]; ++j) { @@ -700,8 +700,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_TRANSPOSE_KERNEL); template void conj_transpose(std::shared_ptr exec, - const matrix::Dense *orig, - matrix::Dense *trans) + const matrix::Dense* orig, + matrix::Dense* trans) { for (size_type i = 0; i < orig->get_size()[0]; ++i) { for (size_type j = 0; j < orig->get_size()[1]; ++j) { @@ -715,9 +715,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_CONJ_TRANSPOSE_KERNEL); template void symm_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Dense *orig, - matrix::Dense *permuted) + const Array* permutation_indices, + const matrix::Dense* orig, + matrix::Dense* permuted) { auto perm = permutation_indices->get_const_data(); auto size = orig->get_size()[0]; @@ -734,9 +734,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inv_symm_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Dense *orig, - matrix::Dense *permuted) + const Array* permutation_indices, + const matrix::Dense* orig, + matrix::Dense* permuted) { auto perm = permutation_indices->get_const_data(); auto size = orig->get_size()[0]; @@ -753,9 +753,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void row_gather(std::shared_ptr exec, - const Array *row_indices, - const matrix::Dense *orig, - matrix::Dense *row_gathered) + const Array* row_indices, + const matrix::Dense* orig, + matrix::Dense* row_gathered) { auto rows = row_indices->get_const_data(); for (size_type i = 0; i < row_indices->get_num_elems(); ++i) { @@ -771,9 +771,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Dense *orig, - matrix::Dense *column_permuted) + const Array* permutation_indices, + const matrix::Dense* orig, + matrix::Dense* column_permuted) { auto perm = permutation_indices->get_const_data(); for (size_type j = 0; j < orig->get_size()[1]; ++j) { @@ -789,9 +789,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inverse_row_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Dense *orig, - matrix::Dense *row_permuted) + const Array* permutation_indices, + const matrix::Dense* orig, + matrix::Dense* row_permuted) { auto perm = permutation_indices->get_const_data(); for (size_type i = 0; i < orig->get_size()[0]; ++i) { @@ -807,9 +807,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void inverse_column_permute(std::shared_ptr exec, - const Array *permutation_indices, - const matrix::Dense *orig, - matrix::Dense *column_permuted) + const Array* permutation_indices, + const matrix::Dense* orig, + matrix::Dense* column_permuted) { auto perm = permutation_indices->get_const_data(); for (size_type j = 0; j < orig->get_size()[1]; ++j) { @@ -825,8 +825,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Dense *orig, - matrix::Diagonal *diag) + const matrix::Dense* orig, + matrix::Diagonal* diag) { auto diag_values = diag->get_values(); for (size_type i = 0; i < diag->get_size()[0]; ++i) { @@ -839,7 +839,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DENSE_EXTRACT_DIAGONAL_KERNEL); template void inplace_absolute_dense(std::shared_ptr exec, - matrix::Dense *source) + matrix::Dense* source) { auto dim = source->get_size(); for (size_type row = 0; row < dim[0]; row++) { @@ -854,8 +854,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_INPLACE_ABSOLUTE_DENSE_KERNEL); template void outplace_absolute_dense(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Dense> *result) + const matrix::Dense* source, + matrix::Dense>* result) { auto dim = source->get_size(); for (size_type row = 0; row < dim[0]; row++) { @@ -870,8 +870,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_OUTPLACE_ABSOLUTE_DENSE_KERNEL); template void make_complex(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Dense> *result) + const matrix::Dense* source, + matrix::Dense>* result) { auto dim = source->get_size(); for (size_type row = 0; row < dim[0]; row++) { @@ -886,8 +886,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_MAKE_COMPLEX_KERNEL); template void get_real(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Dense> *result) + const matrix::Dense* source, + matrix::Dense>* result) { auto dim = source->get_size(); for (size_type row = 0; row < dim[0]; row++) { @@ -902,8 +902,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GET_REAL_KERNEL); template void get_imag(std::shared_ptr exec, - const matrix::Dense *source, - matrix::Dense> *result) + const matrix::Dense* source, + matrix::Dense>* result) { auto dim = source->get_size(); for (size_type row = 0; row < dim[0]; row++) { diff --git a/reference/matrix/diagonal_kernels.cpp b/reference/matrix/diagonal_kernels.cpp index 1a832c68468..ea92210170c 100644 --- a/reference/matrix/diagonal_kernels.cpp +++ b/reference/matrix/diagonal_kernels.cpp @@ -50,9 +50,9 @@ namespace diagonal { template void apply_to_dense(std::shared_ptr exec, - const matrix::Diagonal *a, - const matrix::Dense *b, - matrix::Dense *c) + const matrix::Diagonal* a, + const matrix::Dense* b, + matrix::Dense* c) { const auto diag_values = a->get_const_values(); for (size_type row = 0; row < a->get_size()[0]; row++) { @@ -68,9 +68,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_DIAGONAL_APPLY_TO_DENSE_KERNEL); template void right_apply_to_dense(std::shared_ptr exec, - const matrix::Diagonal *a, - const matrix::Dense *b, - matrix::Dense *c) + const matrix::Diagonal* a, + const matrix::Dense* b, + matrix::Dense* c) { const auto diag_values = a->get_const_values(); for (size_type row = 0; row < b->get_size()[0]; row++) { @@ -86,9 +86,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void apply_to_csr(std::shared_ptr exec, - const matrix::Diagonal *a, - const matrix::Csr *b, - matrix::Csr *c) + const matrix::Diagonal* a, + const matrix::Csr* b, + matrix::Csr* c) { const auto diag_values = a->get_const_values(); c->copy_from(b); @@ -110,9 +110,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void right_apply_to_csr(std::shared_ptr exec, - const matrix::Diagonal *a, - const matrix::Csr *b, - matrix::Csr *c) + const matrix::Diagonal* a, + const matrix::Csr* b, + matrix::Csr* c) { const auto diag_values = a->get_const_values(); c->copy_from(b); @@ -134,8 +134,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Diagonal *source, - matrix::Csr *result) + const matrix::Diagonal* source, + matrix::Csr* result) { const auto size = source->get_size()[0]; auto row_ptrs = result->get_row_ptrs(); @@ -157,8 +157,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void conj_transpose(std::shared_ptr exec, - const matrix::Diagonal *orig, - matrix::Diagonal *trans) + const matrix::Diagonal* orig, + matrix::Diagonal* trans) { const auto size = orig->get_size()[0]; const auto orig_values = orig->get_const_values(); diff --git a/reference/matrix/ell_kernels.cpp b/reference/matrix/ell_kernels.cpp index 24f166e52db..9b869e9ded8 100644 --- a/reference/matrix/ell_kernels.cpp +++ b/reference/matrix/ell_kernels.cpp @@ -57,9 +57,9 @@ namespace ell { template void spmv(std::shared_ptr exec, - const matrix::Ell *a, - const matrix::Dense *b, - matrix::Dense *c) + const matrix::Ell* a, + const matrix::Dense* b, + matrix::Dense* c) { using a_accessor = gko::acc::reduced_row_major<1, OutputValueType, const MatrixValueType>; @@ -97,11 +97,11 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Ell *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Ell* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { using a_accessor = gko::acc::reduced_row_major<1, OutputValueType, const MatrixValueType>; @@ -140,8 +140,8 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Ell *source, - matrix::Dense *result) + const matrix::Ell* source, + matrix::Dense* result) { auto num_rows = source->get_size()[0]; auto num_cols = source->get_size()[1]; @@ -164,8 +164,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Ell *source, - matrix::Csr *result) + const matrix::Ell* source, + matrix::Csr* result) { const auto num_rows = source->get_size()[0]; const auto max_nnz_per_row = source->get_num_stored_elements_per_row(); @@ -196,8 +196,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Ell *source, - size_type *result) + const matrix::Ell* source, + size_type* result) { size_type nonzeros = 0; const auto num_rows = source->get_size()[0]; @@ -219,8 +219,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_nonzeros_per_row(std::shared_ptr exec, - const matrix::Ell *source, - Array *result) + const matrix::Ell* source, + Array* result) { const auto num_rows = source->get_size()[0]; const auto max_nnz_per_row = source->get_num_stored_elements_per_row(); @@ -244,8 +244,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Ell *orig, - matrix::Diagonal *diag) + const matrix::Ell* orig, + matrix::Diagonal* diag) { const auto col_idxs = orig->get_const_col_idxs(); const auto values = orig->get_const_values(); diff --git a/reference/matrix/fbcsr_kernels.cpp b/reference/matrix/fbcsr_kernels.cpp index 35c0bcbbf8b..a7fbed91a78 100644 --- a/reference/matrix/fbcsr_kernels.cpp +++ b/reference/matrix/fbcsr_kernels.cpp @@ -67,9 +67,9 @@ namespace fbcsr { template void spmv(const std::shared_ptr, - const matrix::Fbcsr *const a, - const matrix::Dense *const b, - matrix::Dense *const c) + const matrix::Fbcsr* const a, + const matrix::Dense* const b, + matrix::Dense* const c) { const int bs = a->get_block_size(); const auto nvecs = static_cast(b->get_size()[1]); @@ -109,11 +109,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); template void advanced_spmv(const std::shared_ptr, - const matrix::Dense *const alpha, - const matrix::Fbcsr *const a, - const matrix::Dense *const b, - const matrix::Dense *const beta, - matrix::Dense *const c) + const matrix::Dense* const alpha, + const matrix::Fbcsr* const a, + const matrix::Dense* const b, + const matrix::Dense* const beta, + matrix::Dense* const c) { const int bs = a->get_block_size(); const auto nvecs = static_cast(b->get_size()[1]); @@ -155,15 +155,15 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense(const std::shared_ptr, - const matrix::Fbcsr *const source, - matrix::Dense *const result) + const matrix::Fbcsr* const source, + matrix::Dense* const result) { const int bs = source->get_block_size(); const IndexType nbrows = source->get_num_block_rows(); const IndexType nbcols = source->get_num_block_cols(); - const IndexType *const row_ptrs = source->get_const_row_ptrs(); - const IndexType *const col_idxs = source->get_const_col_idxs(); - const ValueType *const vals = source->get_const_values(); + const IndexType* const row_ptrs = source->get_const_row_ptrs(); + const IndexType* const col_idxs = source->get_const_col_idxs(); + const ValueType* const vals = source->get_const_values(); const acc::range> values{ std::array{source->get_num_stored_blocks(), @@ -199,24 +199,24 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(const std::shared_ptr, - const matrix::Fbcsr *const source, - matrix::Csr *const result) + const matrix::Fbcsr* const source, + matrix::Csr* const result) { const int bs = source->get_block_size(); const IndexType nbrows = source->get_num_block_rows(); const IndexType nbcols = source->get_num_block_cols(); - const IndexType *const browptrs = source->get_const_row_ptrs(); - const IndexType *const bcolinds = source->get_const_col_idxs(); - const ValueType *const bvals = source->get_const_values(); + const IndexType* const browptrs = source->get_const_row_ptrs(); + const IndexType* const bcolinds = source->get_const_col_idxs(); + const ValueType* const bvals = source->get_const_values(); assert(nbrows * bs == result->get_size()[0]); assert(nbcols * bs == result->get_size()[1]); assert(source->get_num_stored_elements() == result->get_num_stored_elements()); - IndexType *const row_ptrs = result->get_row_ptrs(); - IndexType *const col_idxs = result->get_col_idxs(); - ValueType *const vals = result->get_values(); + IndexType* const row_ptrs = result->get_row_ptrs(); + IndexType* const col_idxs = result->get_col_idxs(); + ValueType* const vals = result->get_values(); const acc::range> bvalues{ std::array{source->get_num_stored_blocks(), @@ -259,12 +259,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_fbcsr_to_fbcsc(const IndexType num_blk_rows, const int blksz, - const IndexType *const row_ptrs, - const IndexType *const col_idxs, - const ValueType *const fbcsr_vals, - IndexType *const row_idxs, - IndexType *const col_ptrs, - ValueType *const csc_vals, UnaryOperator op) + const IndexType* const row_ptrs, + const IndexType* const col_idxs, + const ValueType* const fbcsr_vals, + IndexType* const row_idxs, + IndexType* const col_ptrs, + ValueType* const csc_vals, UnaryOperator op) { const acc::range> rvalues{ std::array{static_cast(row_ptrs[num_blk_rows]), @@ -295,8 +295,8 @@ void convert_fbcsr_to_fbcsc(const IndexType num_blk_rows, const int blksz, template void transpose_and_transform( - matrix::Fbcsr *const trans, - const matrix::Fbcsr *const orig, UnaryOperator op) + matrix::Fbcsr* const trans, + const matrix::Fbcsr* const orig, UnaryOperator op) { const int bs = orig->get_block_size(); auto trans_row_ptrs = trans->get_row_ptrs(); @@ -321,8 +321,8 @@ void transpose_and_transform( template void transpose(std::shared_ptr, - const matrix::Fbcsr *const orig, - matrix::Fbcsr *const trans) + const matrix::Fbcsr* const orig, + matrix::Fbcsr* const trans) { transpose_and_transform(trans, orig, [](const ValueType x) { return x; }); } @@ -333,8 +333,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void conj_transpose(std::shared_ptr, - const matrix::Fbcsr *const orig, - matrix::Fbcsr *const trans) + const matrix::Fbcsr* const orig, + matrix::Fbcsr* const trans) { transpose_and_transform(trans, orig, [](const ValueType x) { return conj(x); }); @@ -347,8 +347,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_max_nnz_per_row( std::shared_ptr, - const matrix::Fbcsr *const source, - size_type *const result) + const matrix::Fbcsr* const source, + size_type* const result) { const auto num_rows = source->get_size()[0]; const auto row_ptrs = source->get_const_row_ptrs(); @@ -371,7 +371,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void calculate_nonzeros_per_row( std::shared_ptr, - const matrix::Fbcsr *source, Array *result) + const matrix::Fbcsr* source, Array* result) { const auto row_ptrs = source->get_const_row_ptrs(); auto row_nnz_val = result->get_data(); @@ -391,8 +391,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr, - const matrix::Fbcsr *const to_check, - bool *const is_sorted) + const matrix::Fbcsr* const to_check, + bool* const is_sorted) { const auto row_ptrs = to_check->get_const_row_ptrs(); const auto col_idxs = to_check->get_const_col_idxs(); @@ -416,7 +416,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template static void sort_by_column_index_impl( - matrix::Fbcsr *const to_sort) + matrix::Fbcsr* const to_sort) { auto row_ptrs = to_sort->get_const_row_ptrs(); auto col_idxs = to_sort->get_col_idxs(); @@ -424,8 +424,8 @@ static void sort_by_column_index_impl( const auto nbrows = to_sort->get_num_block_rows(); constexpr int bs2 = mat_blk_sz * mat_blk_sz; for (IndexType i = 0; i < nbrows; ++i) { - IndexType *const brow_col_idxs = col_idxs + row_ptrs[i]; - ValueType *const brow_vals = values + row_ptrs[i] * bs2; + IndexType* const brow_col_idxs = col_idxs + row_ptrs[i]; + ValueType* const brow_vals = values + row_ptrs[i] * bs2; const IndexType nbnz_brow = row_ptrs[i + 1] - row_ptrs[i]; std::vector col_permute(nbnz_brow); @@ -447,7 +447,7 @@ static void sort_by_column_index_impl( template void sort_by_column_index(const std::shared_ptr exec, - matrix::Fbcsr *const to_sort) + matrix::Fbcsr* const to_sort) { const int bs = to_sort->get_block_size(); if (bs == 2) { @@ -467,8 +467,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr, - const matrix::Fbcsr *const orig, - matrix::Diagonal *const diag) + const matrix::Fbcsr* const orig, + matrix::Diagonal* const diag) { const auto row_ptrs = orig->get_const_row_ptrs(); const auto col_idxs = orig->get_const_col_idxs(); diff --git a/reference/matrix/hybrid_kernels.cpp b/reference/matrix/hybrid_kernels.cpp index 7bb5b4d512d..60ae4357497 100644 --- a/reference/matrix/hybrid_kernels.cpp +++ b/reference/matrix/hybrid_kernels.cpp @@ -58,8 +58,8 @@ namespace hybrid { template void convert_to_dense(std::shared_ptr exec, - const matrix::Hybrid *source, - matrix::Dense *result) + const matrix::Hybrid* source, + matrix::Dense* result) { auto num_rows = source->get_size()[0]; auto num_cols = source->get_size()[1]; @@ -93,8 +93,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Hybrid *source, - matrix::Csr *result) + const matrix::Hybrid* source, + matrix::Csr* result) { auto csr_val = result->get_values(); auto csr_col_idxs = result->get_col_idxs(); @@ -137,8 +137,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Hybrid *source, - size_type *result) + const matrix::Hybrid* source, + size_type* result) { size_type ell_nnz = 0; size_type coo_nnz = 0; diff --git a/reference/matrix/sellp_kernels.cpp b/reference/matrix/sellp_kernels.cpp index 094d29a0ec6..d8c3d05b400 100644 --- a/reference/matrix/sellp_kernels.cpp +++ b/reference/matrix/sellp_kernels.cpp @@ -52,8 +52,8 @@ namespace sellp { template void spmv(std::shared_ptr exec, - const matrix::Sellp *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::Sellp* a, + const matrix::Dense* b, matrix::Dense* c) { auto col_idxs = a->get_const_col_idxs(); auto slice_lengths = a->get_const_slice_lengths(); @@ -85,11 +85,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SELLP_SPMV_KERNEL); template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::Sellp *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::Sellp* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { auto vals = a->get_const_values(); auto col_idxs = a->get_const_col_idxs(); @@ -125,8 +125,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_dense(std::shared_ptr exec, - const matrix::Sellp *source, - matrix::Dense *result) + const matrix::Sellp* source, + matrix::Dense* result) { auto num_rows = source->get_size()[0]; auto num_cols = source->get_size()[1]; @@ -161,8 +161,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void convert_to_csr(std::shared_ptr exec, - const matrix::Sellp *source, - matrix::Csr *result) + const matrix::Sellp* source, + matrix::Csr* result) { auto num_rows = source->get_size()[0]; auto slice_size = source->get_slice_size(); @@ -207,8 +207,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_nonzeros(std::shared_ptr exec, - const matrix::Sellp *source, - size_type *result) + const matrix::Sellp* source, + size_type* result) { auto num_rows = source->get_size()[0]; auto slice_size = source->get_slice_size(); @@ -241,8 +241,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void extract_diagonal(std::shared_ptr exec, - const matrix::Sellp *orig, - matrix::Diagonal *diag) + const matrix::Sellp* orig, + matrix::Diagonal* diag) { const auto diag_size = diag->get_size()[0]; const auto slice_size = orig->get_slice_size(); diff --git a/reference/matrix/sparsity_csr_kernels.cpp b/reference/matrix/sparsity_csr_kernels.cpp index f70af45a18d..a1a96f72a56 100644 --- a/reference/matrix/sparsity_csr_kernels.cpp +++ b/reference/matrix/sparsity_csr_kernels.cpp @@ -60,8 +60,8 @@ namespace sparsity_csr { template void spmv(std::shared_ptr exec, - const matrix::SparsityCsr *a, - const matrix::Dense *b, matrix::Dense *c) + const matrix::SparsityCsr* a, + const matrix::Dense* b, matrix::Dense* c) { auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); @@ -87,11 +87,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void advanced_spmv(std::shared_ptr exec, - const matrix::Dense *alpha, - const matrix::SparsityCsr *a, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *c) + const matrix::Dense* alpha, + const matrix::SparsityCsr* a, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* c) { auto row_ptrs = a->get_const_row_ptrs(); auto col_idxs = a->get_const_col_idxs(); @@ -120,8 +120,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void count_num_diagonal_elements( std::shared_ptr exec, - const matrix::SparsityCsr *matrix, - size_type *num_diagonal_elements) + const matrix::SparsityCsr* matrix, + size_type* num_diagonal_elements) { auto num_rows = matrix->get_size()[0]; auto row_ptrs = matrix->get_const_row_ptrs(); @@ -143,9 +143,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void remove_diagonal_elements(std::shared_ptr exec, - const IndexType *row_ptrs, - const IndexType *col_idxs, - matrix::SparsityCsr *matrix) + const IndexType* row_ptrs, + const IndexType* col_idxs, + matrix::SparsityCsr* matrix) { auto num_rows = matrix->get_size()[0]; auto adj_ptrs = matrix->get_row_ptrs(); @@ -177,9 +177,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template inline void convert_sparsity_to_csc(size_type num_rows, - const IndexType *row_ptrs, - const IndexType *col_idxs, - IndexType *row_idxs, IndexType *col_ptrs) + const IndexType* row_ptrs, + const IndexType* col_idxs, + IndexType* row_idxs, IndexType* col_ptrs) { for (size_type row = 0; row < num_rows; ++row) { for (auto i = row_ptrs[row]; i < row_ptrs[row + 1]; ++i) { @@ -193,8 +193,8 @@ inline void convert_sparsity_to_csc(size_type num_rows, template void transpose_and_transform( std::shared_ptr exec, - const matrix::SparsityCsr *orig, - matrix::SparsityCsr *trans) + const matrix::SparsityCsr* orig, + matrix::SparsityCsr* trans) { auto trans_row_ptrs = trans->get_row_ptrs(); auto orig_row_ptrs = orig->get_const_row_ptrs(); @@ -216,8 +216,8 @@ void transpose_and_transform( template void transpose(std::shared_ptr exec, - const matrix::SparsityCsr *orig, - matrix::SparsityCsr *trans) + const matrix::SparsityCsr* orig, + matrix::SparsityCsr* trans) { transpose_and_transform(exec, orig, trans); } @@ -228,7 +228,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void sort_by_column_index(std::shared_ptr exec, - matrix::SparsityCsr *to_sort) + matrix::SparsityCsr* to_sort) { auto row_ptrs = to_sort->get_row_ptrs(); auto col_idxs = to_sort->get_col_idxs(); @@ -247,7 +247,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( std::shared_ptr exec, - const matrix::SparsityCsr *to_check, bool *is_sorted) + const matrix::SparsityCsr* to_check, bool* is_sorted) { const auto row_ptrs = to_check->get_const_row_ptrs(); const auto col_idxs = to_check->get_const_col_idxs(); diff --git a/reference/multigrid/amgx_pgm_kernels.cpp b/reference/multigrid/amgx_pgm_kernels.cpp index bd696164c4f..7a1b3b5358b 100644 --- a/reference/multigrid/amgx_pgm_kernels.cpp +++ b/reference/multigrid/amgx_pgm_kernels.cpp @@ -64,8 +64,8 @@ namespace amgx_pgm { template void match_edge(std::shared_ptr exec, - const Array &strongest_neighbor, - Array &agg) + const Array& strongest_neighbor, + Array& agg) { auto agg_vals = agg.get_data(); auto strongest_neighbor_vals = strongest_neighbor.get_const_data(); @@ -88,7 +88,7 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_MATCH_EDGE_KERNEL); template void count_unagg(std::shared_ptr exec, - const Array &agg, IndexType *num_unagg) + const Array& agg, IndexType* num_unagg) { IndexType unagg = 0; for (size_type i = 0; i < agg.get_num_elems(); i++) { @@ -102,7 +102,7 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_COUNT_UNAGG_KERNEL); template void renumber(std::shared_ptr exec, - Array &agg, IndexType *num_agg) + Array& agg, IndexType* num_agg) { const auto num = agg.get_num_elems(); Array agg_map(exec, num + 1); @@ -127,9 +127,9 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_AMGX_PGM_RENUMBER_KERNEL); template void find_strongest_neighbor( std::shared_ptr exec, - const matrix::Csr *weight_mtx, - const matrix::Diagonal *diag, Array &agg, - Array &strongest_neighbor) + const matrix::Csr* weight_mtx, + const matrix::Diagonal* diag, Array& agg, + Array& strongest_neighbor) { const auto row_ptrs = weight_mtx->get_const_row_ptrs(); const auto col_idxs = weight_mtx->get_const_col_idxs(); @@ -181,10 +181,10 @@ GKO_INSTANTIATE_FOR_EACH_NON_COMPLEX_VALUE_AND_INDEX_TYPE( template void assign_to_exist_agg(std::shared_ptr exec, - const matrix::Csr *weight_mtx, - const matrix::Diagonal *diag, - Array &agg, - Array &intermediate_agg) + const matrix::Csr* weight_mtx, + const matrix::Diagonal* diag, + Array& agg, + Array& intermediate_agg) { const auto row_ptrs = weight_mtx->get_const_row_ptrs(); const auto col_idxs = weight_mtx->get_const_col_idxs(); diff --git a/reference/preconditioner/isai_kernels.cpp b/reference/preconditioner/isai_kernels.cpp index 2b91b54cd07..8b3daac0cfe 100644 --- a/reference/preconditioner/isai_kernels.cpp +++ b/reference/preconditioner/isai_kernels.cpp @@ -58,8 +58,8 @@ namespace isai { template -void forall_matching(const IndexType *fst, IndexType fst_size, - const IndexType *snd, IndexType snd_size, Callback cb) +void forall_matching(const IndexType* fst, IndexType fst_size, + const IndexType* snd, IndexType snd_size, Callback cb) { IndexType fst_idx{}; IndexType snd_idx{}; @@ -78,9 +78,9 @@ void forall_matching(const IndexType *fst, IndexType fst_size, template void generic_generate(std::shared_ptr exec, - const matrix::Csr *mtx, - matrix::Csr *inverse_mtx, - IndexType *excess_rhs_ptrs, IndexType *excess_nz_ptrs, + const matrix::Csr* mtx, + matrix::Csr* inverse_mtx, + IndexType* excess_rhs_ptrs, IndexType* excess_nz_ptrs, Callable direct_solve, bool tri) { /* @@ -190,14 +190,14 @@ void generic_generate(std::shared_ptr exec, template void generate_tri_inverse(std::shared_ptr exec, - const matrix::Csr *mtx, - matrix::Csr *inverse_mtx, - IndexType *excess_rhs_ptrs, IndexType *excess_nz_ptrs, + const matrix::Csr* mtx, + matrix::Csr* inverse_mtx, + IndexType* excess_rhs_ptrs, IndexType* excess_nz_ptrs, bool lower) { auto trs_solve = [lower](const range> trisystem, - ValueType *rhs, const IndexType) { + ValueType* rhs, const IndexType) { const IndexType size = trisystem.length(0); if (size <= 0) { return; @@ -240,7 +240,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -inline IndexType choose_pivot(IndexType block_size, const ValueType *block, +inline IndexType choose_pivot(IndexType block_size, const ValueType* block, size_type stride) { IndexType cp = 0; @@ -255,7 +255,7 @@ inline IndexType choose_pivot(IndexType block_size, const ValueType *block, template inline void swap_rows(IndexType row1, IndexType row2, IndexType block_size, - ValueType *block, size_type stride) + ValueType* block, size_type stride) { using std::swap; for (IndexType i = 0; i < block_size; ++i) { @@ -266,15 +266,15 @@ inline void swap_rows(IndexType row1, IndexType row2, IndexType block_size, template void generate_general_inverse(std::shared_ptr exec, - const matrix::Csr *mtx, - matrix::Csr *inverse_mtx, - IndexType *excess_rhs_ptrs, - IndexType *excess_nz_ptrs, bool spd) + const matrix::Csr* mtx, + matrix::Csr* inverse_mtx, + IndexType* excess_rhs_ptrs, + IndexType* excess_nz_ptrs, bool spd) { using std::swap; auto general_solve = [spd](const range> transposed_system_range, - ValueType *rhs, const IndexType rhs_one_idx) { + ValueType* rhs, const IndexType rhs_one_idx) { const IndexType size = transposed_system_range.length(0); if (size <= 0) { return; @@ -336,12 +336,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void generate_excess_system(std::shared_ptr, - const matrix::Csr *input, - const matrix::Csr *inverse, - const IndexType *excess_rhs_ptrs, - const IndexType *excess_nz_ptrs, - matrix::Csr *excess_system, - matrix::Dense *excess_rhs, + const matrix::Csr* input, + const matrix::Csr* inverse, + const IndexType* excess_rhs_ptrs, + const IndexType* excess_nz_ptrs, + matrix::Csr* excess_system, + matrix::Dense* excess_rhs, size_type e_start, size_type e_end) { const auto num_rows = input->get_size()[0]; @@ -399,8 +399,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void scale_excess_solution(std::shared_ptr, - const IndexType *excess_block_ptrs, - matrix::Dense *excess_solution, + const IndexType* excess_block_ptrs, + matrix::Dense* excess_solution, size_type e_start, size_type e_end) { auto excess_values = excess_solution->get_values(); @@ -427,9 +427,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void scatter_excess_solution(std::shared_ptr, - const IndexType *excess_block_ptrs, - const matrix::Dense *excess_solution, - matrix::Csr *inverse, + const IndexType* excess_block_ptrs, + const matrix::Dense* excess_solution, + matrix::Csr* inverse, size_type e_start, size_type e_end) { auto excess_values = excess_solution->get_const_values(); diff --git a/reference/preconditioner/jacobi_kernels.cpp b/reference/preconditioner/jacobi_kernels.cpp index e8abd8b8958..6101083c0a0 100644 --- a/reference/preconditioner/jacobi_kernels.cpp +++ b/reference/preconditioner/jacobi_kernels.cpp @@ -64,9 +64,9 @@ namespace { template -inline bool has_same_nonzero_pattern(const IndexType *prev_row_ptr, - const IndexType *curr_row_ptr, - const IndexType *next_row_ptr) +inline bool has_same_nonzero_pattern(const IndexType* prev_row_ptr, + const IndexType* curr_row_ptr, + const IndexType* next_row_ptr) { return std::distance(curr_row_ptr, next_row_ptr) == std::distance(prev_row_ptr, curr_row_ptr) && @@ -75,8 +75,8 @@ inline bool has_same_nonzero_pattern(const IndexType *prev_row_ptr, template -size_type find_natural_blocks(const matrix::Csr *mtx, - uint32 max_block_size, IndexType *block_ptrs) +size_type find_natural_blocks(const matrix::Csr* mtx, + uint32 max_block_size, IndexType* block_ptrs) { const auto rows = mtx->get_size()[0]; const auto row_ptrs = mtx->get_const_row_ptrs(); @@ -110,7 +110,7 @@ size_type find_natural_blocks(const matrix::Csr *mtx, template inline size_type agglomerate_supervariables(uint32 max_block_size, size_type num_natural_blocks, - IndexType *block_ptrs) + IndexType* block_ptrs) { if (num_natural_blocks == 0) { return 0; @@ -137,9 +137,9 @@ inline size_type agglomerate_supervariables(uint32 max_block_size, template void find_blocks(std::shared_ptr exec, - const matrix::Csr *system_matrix, - uint32 max_block_size, size_type &num_blocks, - Array &block_pointers) + const matrix::Csr* system_matrix, + uint32 max_block_size, size_type& num_blocks, + Array& block_pointers) { num_blocks = find_natural_blocks(system_matrix, max_block_size, block_pointers.get_data()); @@ -155,9 +155,9 @@ namespace { template -inline void extract_block(const matrix::Csr *mtx, +inline void extract_block(const matrix::Csr* mtx, IndexType block_size, IndexType block_start, - ValueType *block, size_type stride) + ValueType* block, size_type stride) { for (int i = 0; i < block_size; ++i) { for (int j = 0; j < block_size; ++j) { @@ -181,7 +181,7 @@ inline void extract_block(const matrix::Csr *mtx, template -inline IndexType choose_pivot(IndexType block_size, const ValueType *block, +inline IndexType choose_pivot(IndexType block_size, const ValueType* block, size_type stride) { IndexType cp = 0; @@ -196,7 +196,7 @@ inline IndexType choose_pivot(IndexType block_size, const ValueType *block, template inline void swap_rows(IndexType row1, IndexType row2, IndexType block_size, - ValueType *block, size_type stride) + ValueType* block, size_type stride) { using std::swap; for (IndexType i = 0; i < block_size; ++i) { @@ -207,7 +207,7 @@ inline void swap_rows(IndexType row1, IndexType row2, IndexType block_size, template inline bool apply_gauss_jordan_transform(IndexType row, IndexType col, - IndexType block_size, ValueType *block, + IndexType block_size, ValueType* block, size_type stride) { const auto d = block[row * stride + col]; @@ -236,8 +236,8 @@ template > -inline void transpose_block(IndexType block_size, const SourceValueType *from, - size_type from_stride, ResultValueType *to, +inline void transpose_block(IndexType block_size, const SourceValueType* from, + size_type from_stride, ResultValueType* to, size_type to_stride, ValueConverter converter = {}) noexcept { @@ -254,8 +254,8 @@ template > inline void conj_transpose_block(IndexType block_size, - const SourceValueType *from, - size_type from_stride, ResultValueType *to, + const SourceValueType* from, + size_type from_stride, ResultValueType* to, size_type to_stride, ValueConverter converter = {}) noexcept { @@ -272,10 +272,10 @@ template > inline void permute_and_transpose_block(IndexType block_size, - const IndexType *col_perm, - const SourceValueType *source, + const IndexType* col_perm, + const SourceValueType* source, size_type source_stride, - ResultValueType *result, + ResultValueType* result, size_type result_stride, ValueConverter converter = {}) { @@ -289,8 +289,8 @@ inline void permute_and_transpose_block(IndexType block_size, template -inline bool invert_block(IndexType block_size, IndexType *perm, - ValueType *block, size_type stride) +inline bool invert_block(IndexType block_size, IndexType* perm, + ValueType* block, size_type stride) { using std::swap; for (IndexType k = 0; k < block_size; ++k) { @@ -311,7 +311,7 @@ inline bool invert_block(IndexType block_size, IndexType *perm, template inline bool validate_precision_reduction_feasibility( std::shared_ptr exec, IndexType block_size, - const ValueType *block, size_type stride) + const ValueType* block, size_type stride) { using gko::detail::float_traits; vector tmp(block_size * block_size, {}, exec); @@ -341,14 +341,14 @@ inline bool validate_precision_reduction_feasibility( template void generate(std::shared_ptr exec, - const matrix::Csr *system_matrix, + const matrix::Csr* system_matrix, size_type num_blocks, uint32 max_block_size, remove_complex accuracy, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - Array> &conditioning, - Array &block_precisions, - const Array &block_pointers, Array &blocks) + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + Array>& conditioning, + Array& block_precisions, + const Array& block_pointers, Array& blocks) { const auto ptrs = block_pointers.get_const_data(); const auto prec = block_precisions.get_data(); @@ -426,7 +426,7 @@ void generate(std::shared_ptr exec, permute_and_transpose_block( block_size, perm[b].get_data(), block[b].get_data(), block_size, - reinterpret_cast( + reinterpret_cast( blocks.get_data() + storage_scheme.get_group_offset(g + b)) + storage_scheme.get_block_offset(g + b), @@ -446,9 +446,9 @@ template < typename ValueType, typename BlockValueType, typename ValueConverter = default_converter> inline void apply_block(size_type block_size, size_type num_rhs, - const BlockValueType *block, size_type stride, - ValueType alpha, const ValueType *b, size_type stride_b, - ValueType beta, ValueType *x, size_type stride_x, + const BlockValueType* block, size_type stride, + ValueType alpha, const ValueType* b, size_type stride_b, + ValueType beta, ValueType* x, size_type stride_x, ValueConverter converter = {}) { if (beta != zero()) { @@ -481,8 +481,8 @@ inline void apply_block(size_type block_size, size_type num_rhs, void initialize_precisions(std::shared_ptr exec, - const Array &source, - Array &precisions) + const Array& source, + Array& precisions) { const auto source_size = source.get_num_elems(); for (auto i = 0u; i < precisions.get_num_elems(); ++i) { @@ -494,14 +494,14 @@ void initialize_precisions(std::shared_ptr exec, template void apply(std::shared_ptr exec, size_type num_blocks, uint32 max_block_size, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - const Array &block_precisions, - const Array &block_pointers, - const Array &blocks, - const matrix::Dense *alpha, - const matrix::Dense *b, - const matrix::Dense *beta, matrix::Dense *x) + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + const Array& block_precisions, + const Array& block_pointers, + const Array& blocks, + const matrix::Dense* alpha, + const matrix::Dense* b, + const matrix::Dense* beta, matrix::Dense* x) { const auto ptrs = block_pointers.get_const_data(); const auto prec = block_precisions.get_const_data(); @@ -515,7 +515,7 @@ void apply(std::shared_ptr exec, size_type num_blocks, GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION( ValueType, p, apply_block(block_size, b->get_size()[1], - reinterpret_cast(group) + + reinterpret_cast(group) + storage_scheme.get_block_offset(i), storage_scheme.get_stride(), alpha->at(0, 0), block_b, b->get_stride(), beta->at(0, 0), block_x, @@ -530,11 +530,11 @@ template void simple_apply( std::shared_ptr exec, size_type num_blocks, uint32 max_block_size, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const matrix::Dense *b, matrix::Dense *x) + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const matrix::Dense* b, matrix::Dense* x) { const auto ptrs = block_pointers.get_const_data(); const auto prec = block_precisions.get_const_data(); @@ -548,7 +548,7 @@ void simple_apply( GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION( ValueType, p, apply_block(block_size, b->get_size()[1], - reinterpret_cast(group) + + reinterpret_cast(group) + storage_scheme.get_block_offset(i), storage_scheme.get_stride(), one(), block_b, b->get_stride(), zero(), block_x, @@ -562,11 +562,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void scalar_apply(std::shared_ptr exec, - const Array &diag, - const matrix::Dense *alpha, - const matrix::Dense *b, - const matrix::Dense *beta, - matrix::Dense *x) + const Array& diag, + const matrix::Dense* alpha, + const matrix::Dense* b, + const matrix::Dense* beta, + matrix::Dense* x) { for (size_type i = 0; i < x->get_size()[0]; ++i) { for (size_type j = 0; j < x->get_size()[1]; ++j) { @@ -581,9 +581,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_APPLY_KERNEL); template void simple_scalar_apply(std::shared_ptr exec, - const Array &diag, - const matrix::Dense *b, - matrix::Dense *x) + const Array& diag, + const matrix::Dense* b, + matrix::Dense* x) { for (size_type i = 0; i < x->get_size()[0]; ++i) { for (size_type j = 0; j < x->get_size()[1]; ++j) { @@ -598,7 +598,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void scalar_conj(std::shared_ptr exec, - const Array &diag, Array &conj_diag) + const Array& diag, Array& conj_diag) { for (size_type i = 0; i < diag.get_num_elems(); ++i) { conj_diag.get_data()[i] = conj(diag.get_const_data()[i]); @@ -610,7 +610,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_SCALAR_CONJ_KERNEL); template void invert_diagonal(std::shared_ptr exec, - const Array &diag, Array &inv_diag) + const Array& diag, Array& inv_diag) { for (size_type i = 0; i < diag.get_num_elems(); ++i) { auto diag_val = diag.get_const_data()[i] == zero() @@ -626,11 +626,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_JACOBI_INVERT_DIAGONAL_KERNEL); template void transpose_jacobi( std::shared_ptr exec, size_type num_blocks, - uint32 max_block_size, const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - Array &out_blocks) + uint32 max_block_size, const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + Array& out_blocks) { const auto ptrs = block_pointers.get_const_data(); const auto prec = block_precisions.get_const_data(); @@ -648,9 +648,9 @@ void transpose_jacobi( ValueType, p, transpose_block( block_size, - reinterpret_cast(group) + block_ofs, + reinterpret_cast(group) + block_ofs, block_stride, - reinterpret_cast(out_group) + block_ofs, + reinterpret_cast(out_group) + block_ofs, block_stride)); } } @@ -662,11 +662,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void conj_transpose_jacobi( std::shared_ptr exec, size_type num_blocks, - uint32 max_block_size, const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - Array &out_blocks) + uint32 max_block_size, const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + Array& out_blocks) { const auto ptrs = block_pointers.get_const_data(); const auto prec = block_precisions.get_const_data(); @@ -684,9 +684,9 @@ void conj_transpose_jacobi( ValueType, p, conj_transpose_block( block_size, - reinterpret_cast(group) + block_ofs, + reinterpret_cast(group) + block_ofs, block_stride, - reinterpret_cast(out_group) + block_ofs, + reinterpret_cast(out_group) + block_ofs, block_stride)); } } @@ -697,8 +697,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void scalar_convert_to_dense(std::shared_ptr exec, - const Array &blocks, - matrix::Dense *result) + const Array& blocks, + matrix::Dense* result) { auto matrix_size = result->get_size(); for (size_type i = 0; i < matrix_size[0]; ++i) { @@ -718,11 +718,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( template void convert_to_dense( std::shared_ptr exec, size_type num_blocks, - const Array &block_precisions, - const Array &block_pointers, const Array &blocks, - const preconditioner::block_interleaved_storage_scheme - &storage_scheme, - ValueType *result_values, size_type result_stride) + const Array& block_precisions, + const Array& block_pointers, const Array& blocks, + const preconditioner::block_interleaved_storage_scheme& + storage_scheme, + ValueType* result_values, size_type result_stride) { const auto ptrs = block_pointers.get_const_data(); const auto prec = block_precisions.get_const_data(); @@ -740,13 +740,12 @@ void convert_to_dense( const auto p = prec ? prec[i] : precision_reduction(); GKO_PRECONDITIONER_JACOBI_RESOLVE_PRECISION( ValueType, p, - transpose_block( - block_size, - reinterpret_cast(group) + - storage_scheme.get_block_offset(i), - storage_scheme.get_stride(), - result_values + ptrs[i] * result_stride + ptrs[i], - result_stride)); + transpose_block(block_size, + reinterpret_cast(group) + + storage_scheme.get_block_offset(i), + storage_scheme.get_stride(), + result_values + ptrs[i] * result_stride + ptrs[i], + result_stride)); } } diff --git a/reference/reorder/rcm_kernels.cpp b/reference/reorder/rcm_kernels.cpp index 06142e7435b..b0002353e38 100644 --- a/reference/reorder/rcm_kernels.cpp +++ b/reference/reorder/rcm_kernels.cpp @@ -68,8 +68,8 @@ namespace rcm { template void get_degree_of_nodes(std::shared_ptr exec, const IndexType num_vertices, - const IndexType *const row_ptrs, - IndexType *const degrees) + const IndexType* const row_ptrs, + IndexType* const degrees) { for (IndexType i = 0; i < num_vertices; ++i) { degrees[i] = row_ptrs[i + 1] - row_ptrs[i]; @@ -86,8 +86,8 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL); template std::pair rls_contender_and_height( std::shared_ptr exec, const IndexType num_vertices, - const IndexType root, const IndexType *const row_ptrs, - const IndexType *const col_idxs, const IndexType *const degrees) + const IndexType root, const IndexType* const row_ptrs, + const IndexType* const col_idxs, const IndexType* const degrees) { // This could actually be allocated in the calling scope, then reused. vector visited_local(num_vertices, false, exec); @@ -159,10 +159,10 @@ std::pair rls_contender_and_height( template IndexType find_starting_node(std::shared_ptr exec, const IndexType num_vertices, - const IndexType *const row_ptrs, - const IndexType *const col_idxs, - const IndexType *const degrees, - const vector &visited, + const IndexType* const row_ptrs, + const IndexType* const col_idxs, + const IndexType* const degrees, + const vector& visited, const gko::reorder::starting_strategy strategy) { using strategies = gko::reorder::starting_strategy; @@ -224,11 +224,11 @@ IndexType find_starting_node(std::shared_ptr exec, template void get_permutation(std::shared_ptr exec, const IndexType num_vertices, - const IndexType *const row_ptrs, - const IndexType *const col_idxs, - const IndexType *const degrees, - IndexType *const permutation, - IndexType *const inv_permutation, + const IndexType* const row_ptrs, + const IndexType* const col_idxs, + const IndexType* const degrees, + IndexType* const permutation, + IndexType* const inv_permutation, const gko::reorder::starting_strategy strategy) { // Storing vertices left to proceess. diff --git a/reference/solver/bicg_kernels.cpp b/reference/solver/bicg_kernels.cpp index 165b6bb4f5c..cb344925c46 100644 --- a/reference/solver/bicg_kernels.cpp +++ b/reference/solver/bicg_kernels.cpp @@ -52,13 +52,13 @@ namespace bicg { template void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *z, matrix::Dense *p, - matrix::Dense *q, matrix::Dense *prev_rho, - matrix::Dense *rho, matrix::Dense *r2, - matrix::Dense *z2, matrix::Dense *p2, - matrix::Dense *q2, - Array *stop_status) + const matrix::Dense* b, matrix::Dense* r, + matrix::Dense* z, matrix::Dense* p, + matrix::Dense* q, matrix::Dense* prev_rho, + matrix::Dense* rho, matrix::Dense* r2, + matrix::Dense* z2, matrix::Dense* p2, + matrix::Dense* q2, + Array* stop_status) { for (size_type j = 0; j < b->get_size()[1]; ++j) { rho->at(j) = zero(); @@ -80,11 +80,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - matrix::Dense *p, const matrix::Dense *z, - matrix::Dense *p2, const matrix::Dense *z2, - const matrix::Dense *rho, - const matrix::Dense *prev_rho, - const Array *stop_status) + matrix::Dense* p, const matrix::Dense* z, + matrix::Dense* p2, const matrix::Dense* z2, + const matrix::Dense* rho, + const matrix::Dense* prev_rho, + const Array* stop_status) { for (size_type i = 0; i < p->get_size()[0]; ++i) { for (size_type j = 0; j < p->get_size()[1]; ++j) { @@ -108,13 +108,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICG_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - matrix::Dense *x, matrix::Dense *r, - matrix::Dense *r2, const matrix::Dense *p, - const matrix::Dense *q, - const matrix::Dense *q2, - const matrix::Dense *beta, - const matrix::Dense *rho, - const Array *stop_status) + matrix::Dense* x, matrix::Dense* r, + matrix::Dense* r2, const matrix::Dense* p, + const matrix::Dense* q, + const matrix::Dense* q2, + const matrix::Dense* beta, + const matrix::Dense* rho, + const Array* stop_status) { for (size_type i = 0; i < x->get_size()[0]; ++i) { for (size_type j = 0; j < x->get_size()[1]; ++j) { diff --git a/reference/solver/bicgstab_kernels.cpp b/reference/solver/bicgstab_kernels.cpp index 4c6bc038344..cdc21296165 100644 --- a/reference/solver/bicgstab_kernels.cpp +++ b/reference/solver/bicgstab_kernels.cpp @@ -54,15 +54,15 @@ namespace bicgstab { template void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *rr, matrix::Dense *y, - matrix::Dense *s, matrix::Dense *t, - matrix::Dense *z, matrix::Dense *v, - matrix::Dense *p, matrix::Dense *prev_rho, - matrix::Dense *rho, matrix::Dense *alpha, - matrix::Dense *beta, matrix::Dense *gamma, - matrix::Dense *omega, - Array *stop_status) + const matrix::Dense* b, matrix::Dense* r, + matrix::Dense* rr, matrix::Dense* y, + matrix::Dense* s, matrix::Dense* t, + matrix::Dense* z, matrix::Dense* v, + matrix::Dense* p, matrix::Dense* prev_rho, + matrix::Dense* rho, matrix::Dense* alpha, + matrix::Dense* beta, matrix::Dense* gamma, + matrix::Dense* omega, + Array* stop_status) { for (size_type j = 0; j < b->get_size()[1]; ++j) { rho->at(j) = one(); @@ -92,13 +92,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - const matrix::Dense *r, matrix::Dense *p, - const matrix::Dense *v, - const matrix::Dense *rho, - const matrix::Dense *prev_rho, - const matrix::Dense *alpha, - const matrix::Dense *omega, - const Array *stop_status) + const matrix::Dense* r, matrix::Dense* p, + const matrix::Dense* v, + const matrix::Dense* rho, + const matrix::Dense* prev_rho, + const matrix::Dense* alpha, + const matrix::Dense* omega, + const Array* stop_status) { for (size_type i = 0; i < p->get_size()[0]; ++i) { for (size_type j = 0; j < p->get_size()[1]; ++j) { @@ -122,12 +122,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - const matrix::Dense *r, matrix::Dense *s, - const matrix::Dense *v, - const matrix::Dense *rho, - matrix::Dense *alpha, - const matrix::Dense *beta, - const Array *stop_status) + const matrix::Dense* r, matrix::Dense* s, + const matrix::Dense* v, + const matrix::Dense* rho, + matrix::Dense* alpha, + const matrix::Dense* beta, + const Array* stop_status) { for (size_type i = 0; i < s->get_size()[0]; ++i) { for (size_type j = 0; j < s->get_size()[1]; ++j) { @@ -150,12 +150,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_2_KERNEL); template void step_3( - std::shared_ptr exec, matrix::Dense *x, - matrix::Dense *r, const matrix::Dense *s, - const matrix::Dense *t, const matrix::Dense *y, - const matrix::Dense *z, const matrix::Dense *alpha, - const matrix::Dense *beta, const matrix::Dense *gamma, - matrix::Dense *omega, const Array *stop_status) + std::shared_ptr exec, matrix::Dense* x, + matrix::Dense* r, const matrix::Dense* s, + const matrix::Dense* t, const matrix::Dense* y, + const matrix::Dense* z, const matrix::Dense* alpha, + const matrix::Dense* beta, const matrix::Dense* gamma, + matrix::Dense* omega, const Array* stop_status) { for (size_type j = 0; j < x->get_size()[1]; ++j) { if (stop_status->get_const_data()[j].has_stopped()) { @@ -184,9 +184,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BICGSTAB_STEP_3_KERNEL); template void finalize(std::shared_ptr exec, - matrix::Dense *x, const matrix::Dense *y, - const matrix::Dense *alpha, - Array *stop_status) + matrix::Dense* x, const matrix::Dense* y, + const matrix::Dense* alpha, + Array* stop_status) { for (size_type j = 0; j < x->get_size()[1]; ++j) { if (stop_status->get_const_data()[j].has_stopped() && diff --git a/reference/solver/cb_gmres_kernels.cpp b/reference/solver/cb_gmres_kernels.cpp index 6b232571b07..bf613e973f0 100644 --- a/reference/solver/cb_gmres_kernels.cpp +++ b/reference/solver/cb_gmres_kernels.cpp @@ -59,12 +59,12 @@ namespace { template -void finish_arnoldi_CGS(matrix::Dense *next_krylov_basis, +void finish_arnoldi_CGS(matrix::Dense* next_krylov_basis, Accessor3d krylov_bases, - matrix::Dense *hessenberg_iter, - matrix::Dense *buffer_iter, - matrix::Dense> *arnoldi_norm, - size_type iter, const stopping_status *stop_status) + matrix::Dense* hessenberg_iter, + matrix::Dense* buffer_iter, + matrix::Dense>* arnoldi_norm, + size_type iter, const stopping_status* stop_status) { static_assert( std::is_same *next_krylov_basis, template -void calculate_sin_and_cos(matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense *hessenberg_iter, +void calculate_sin_and_cos(matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense* hessenberg_iter, size_type iter, const size_type rhs) { if (hessenberg_iter->at(iter, rhs) == zero()) { @@ -201,10 +201,10 @@ void calculate_sin_and_cos(matrix::Dense *givens_sin, template -void givens_rotation(matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense *hessenberg_iter, size_type iter, - const stopping_status *stop_status) +void givens_rotation(matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense* hessenberg_iter, size_type iter, + const stopping_status* stop_status) { for (size_type i = 0; i < hessenberg_iter->get_size()[1]; ++i) { if (stop_status[i].has_stopped()) { @@ -239,10 +239,10 @@ void givens_rotation(matrix::Dense *givens_sin, template void calculate_next_residual_norm( - matrix::Dense *givens_sin, matrix::Dense *givens_cos, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, size_type iter, - const stopping_status *stop_status) + matrix::Dense* givens_sin, matrix::Dense* givens_cos, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, size_type iter, + const stopping_status* stop_status) { for (size_type i = 0; i < residual_norm->get_size()[1]; ++i) { if (stop_status[i].has_stopped()) { @@ -261,9 +261,9 @@ void calculate_next_residual_norm( template void solve_upper_triangular( - const matrix::Dense *residual_norm_collection, - const matrix::Dense *hessenberg, matrix::Dense *y, - const size_type *final_iter_nums) + const matrix::Dense* residual_norm_collection, + const matrix::Dense* hessenberg, matrix::Dense* y, + const size_type* final_iter_nums) { for (size_type k = 0; k < residual_norm_collection->get_size()[1]; ++k) { for (int64 i = final_iter_nums[k] - 1; i >= 0; --i) { @@ -284,9 +284,9 @@ void solve_upper_triangular( template void calculate_qy(ConstAccessor3d krylov_bases, - const matrix::Dense *y, - matrix::Dense *before_preconditioner, - const size_type *final_iter_nums) + const matrix::Dense* y, + matrix::Dense* before_preconditioner, + const size_type* final_iter_nums) { static_assert( std::is_same< @@ -310,11 +310,11 @@ void calculate_qy(ConstAccessor3d krylov_bases, template void initialize_1(std::shared_ptr exec, - const matrix::Dense *b, - matrix::Dense *residual, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - Array *stop_status, size_type krylov_dim) + const matrix::Dense* b, + matrix::Dense* residual, + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + Array* stop_status, size_type krylov_dim) { for (size_type j = 0; j < b->get_size()[1]; ++j) { for (size_type i = 0; i < b->get_size()[0]; ++i) { @@ -333,13 +333,13 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CB_GMRES_INITIALIZE_1_KERNEL); template void initialize_2(std::shared_ptr exec, - const matrix::Dense *residual, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - matrix::Dense> *arnoldi_norm, + const matrix::Dense* residual, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + matrix::Dense>* arnoldi_norm, Accessor3d krylov_bases, - matrix::Dense *next_krylov_basis, - Array *final_iter_nums, size_type krylov_dim) + matrix::Dense* next_krylov_basis, + Array* final_iter_nums, size_type krylov_dim) { static_assert( std::is_same void step_1(std::shared_ptr exec, - matrix::Dense *next_krylov_basis, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - Accessor3d krylov_bases, matrix::Dense *hessenberg_iter, - matrix::Dense *buffer_iter, - matrix::Dense> *arnoldi_norm, - size_type iter, Array *final_iter_nums, - const Array *stop_status, Array *, - Array *) + matrix::Dense* next_krylov_basis, + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + Accessor3d krylov_bases, matrix::Dense* hessenberg_iter, + matrix::Dense* buffer_iter, + matrix::Dense>* arnoldi_norm, + size_type iter, Array* final_iter_nums, + const Array* stop_status, Array*, + Array*) { static_assert( std::is_same void step_2(std::shared_ptr exec, - const matrix::Dense *residual_norm_collection, + const matrix::Dense* residual_norm_collection, ConstAccessor3d krylov_bases, - const matrix::Dense *hessenberg, - matrix::Dense *y, - matrix::Dense *before_preconditioner, - const Array *final_iter_nums) + const matrix::Dense* hessenberg, + matrix::Dense* y, + matrix::Dense* before_preconditioner, + const Array* final_iter_nums) { solve_upper_triangular(residual_norm_collection, hessenberg, y, final_iter_nums->get_const_data()); diff --git a/reference/solver/cg_kernels.cpp b/reference/solver/cg_kernels.cpp index 4202d41a2e3..534b70a4a2c 100644 --- a/reference/solver/cg_kernels.cpp +++ b/reference/solver/cg_kernels.cpp @@ -52,11 +52,11 @@ namespace cg { template void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *z, matrix::Dense *p, - matrix::Dense *q, matrix::Dense *prev_rho, - matrix::Dense *rho, - Array *stop_status) + const matrix::Dense* b, matrix::Dense* r, + matrix::Dense* z, matrix::Dense* p, + matrix::Dense* q, matrix::Dense* prev_rho, + matrix::Dense* rho, + Array* stop_status) { for (size_type j = 0; j < b->get_size()[1]; ++j) { rho->at(j) = zero(); @@ -76,10 +76,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - matrix::Dense *p, const matrix::Dense *z, - const matrix::Dense *rho, - const matrix::Dense *prev_rho, - const Array *stop_status) + matrix::Dense* p, const matrix::Dense* z, + const matrix::Dense* rho, + const matrix::Dense* prev_rho, + const Array* stop_status) { for (size_type i = 0; i < p->get_size()[0]; ++i) { for (size_type j = 0; j < p->get_size()[1]; ++j) { @@ -101,12 +101,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - matrix::Dense *x, matrix::Dense *r, - const matrix::Dense *p, - const matrix::Dense *q, - const matrix::Dense *beta, - const matrix::Dense *rho, - const Array *stop_status) + matrix::Dense* x, matrix::Dense* r, + const matrix::Dense* p, + const matrix::Dense* q, + const matrix::Dense* beta, + const matrix::Dense* rho, + const Array* stop_status) { for (size_type i = 0; i < x->get_size()[0]; ++i) { for (size_type j = 0; j < x->get_size()[1]; ++j) { diff --git a/reference/solver/cgs_kernels.cpp b/reference/solver/cgs_kernels.cpp index c2cfb0f0cda..92f30b55850 100644 --- a/reference/solver/cgs_kernels.cpp +++ b/reference/solver/cgs_kernels.cpp @@ -51,16 +51,16 @@ namespace cgs { template void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *r_tld, matrix::Dense *p, - matrix::Dense *q, matrix::Dense *u, - matrix::Dense *u_hat, - matrix::Dense *v_hat, matrix::Dense *t, - matrix::Dense *alpha, matrix::Dense *beta, - matrix::Dense *gamma, - matrix::Dense *rho_prev, - matrix::Dense *rho, - Array *stop_status) + const matrix::Dense* b, matrix::Dense* r, + matrix::Dense* r_tld, matrix::Dense* p, + matrix::Dense* q, matrix::Dense* u, + matrix::Dense* u_hat, + matrix::Dense* v_hat, matrix::Dense* t, + matrix::Dense* alpha, matrix::Dense* beta, + matrix::Dense* gamma, + matrix::Dense* rho_prev, + matrix::Dense* rho, + Array* stop_status) { for (size_type j = 0; j < b->get_size()[1]; ++j) { rho->at(j) = zero(); @@ -85,11 +85,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - const matrix::Dense *r, matrix::Dense *u, - matrix::Dense *p, const matrix::Dense *q, - matrix::Dense *beta, const matrix::Dense *rho, - const matrix::Dense *rho_prev, - const Array *stop_status) + const matrix::Dense* r, matrix::Dense* u, + matrix::Dense* p, const matrix::Dense* q, + matrix::Dense* beta, const matrix::Dense* rho, + const matrix::Dense* rho_prev, + const Array* stop_status) { for (size_type j = 0; j < p->get_size()[1]; ++j) { if (stop_status->get_const_data()[j].has_stopped()) { @@ -117,12 +117,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - const matrix::Dense *u, - const matrix::Dense *v_hat, matrix::Dense *q, - matrix::Dense *t, matrix::Dense *alpha, - const matrix::Dense *rho, - const matrix::Dense *gamma, - const Array *stop_status) + const matrix::Dense* u, + const matrix::Dense* v_hat, matrix::Dense* q, + matrix::Dense* t, matrix::Dense* alpha, + const matrix::Dense* rho, + const matrix::Dense* gamma, + const Array* stop_status) { for (size_type j = 0; j < u->get_size()[1]; ++j) { if (stop_status->get_const_data()[j].has_stopped()) { @@ -148,10 +148,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CGS_STEP_2_KERNEL); template void step_3(std::shared_ptr exec, - const matrix::Dense *t, - const matrix::Dense *u_hat, matrix::Dense *r, - matrix::Dense *x, const matrix::Dense *alpha, - const Array *stop_status) + const matrix::Dense* t, + const matrix::Dense* u_hat, matrix::Dense* r, + matrix::Dense* x, const matrix::Dense* alpha, + const Array* stop_status) { for (size_type i = 0; i < x->get_size()[0]; ++i) { for (size_type j = 0; j < x->get_size()[1]; ++j) { diff --git a/reference/solver/fcg_kernels.cpp b/reference/solver/fcg_kernels.cpp index b8af3445e5d..cfc8c09642c 100644 --- a/reference/solver/fcg_kernels.cpp +++ b/reference/solver/fcg_kernels.cpp @@ -51,12 +51,12 @@ namespace fcg { template void initialize(std::shared_ptr exec, - const matrix::Dense *b, matrix::Dense *r, - matrix::Dense *z, matrix::Dense *p, - matrix::Dense *q, matrix::Dense *t, - matrix::Dense *prev_rho, - matrix::Dense *rho, matrix::Dense *rho_t, - Array *stop_status) + const matrix::Dense* b, matrix::Dense* r, + matrix::Dense* z, matrix::Dense* p, + matrix::Dense* q, matrix::Dense* t, + matrix::Dense* prev_rho, + matrix::Dense* rho, matrix::Dense* rho_t, + Array* stop_status) { for (size_type j = 0; j < b->get_size()[1]; ++j) { rho->at(j) = zero(); @@ -77,10 +77,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, - matrix::Dense *p, const matrix::Dense *z, - const matrix::Dense *rho_t, - const matrix::Dense *prev_rho, - const Array *stop_status) + matrix::Dense* p, const matrix::Dense* z, + const matrix::Dense* rho_t, + const matrix::Dense* prev_rho, + const Array* stop_status) { for (size_type i = 0; i < p->get_size()[0]; ++i) { for (size_type j = 0; j < p->get_size()[1]; ++j) { @@ -102,12 +102,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_FCG_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - matrix::Dense *x, matrix::Dense *r, - matrix::Dense *t, const matrix::Dense *p, - const matrix::Dense *q, - const matrix::Dense *beta, - const matrix::Dense *rho, - const Array *stop_status) + matrix::Dense* x, matrix::Dense* r, + matrix::Dense* t, const matrix::Dense* p, + const matrix::Dense* q, + const matrix::Dense* beta, + const matrix::Dense* rho, + const Array* stop_status) { for (size_type i = 0; i < x->get_size()[0]; ++i) { for (size_type j = 0; j < x->get_size()[1]; ++j) { diff --git a/reference/solver/gmres_kernels.cpp b/reference/solver/gmres_kernels.cpp index e70cb8e0dc7..bbe268a4dab 100644 --- a/reference/solver/gmres_kernels.cpp +++ b/reference/solver/gmres_kernels.cpp @@ -55,9 +55,9 @@ namespace { template -void finish_arnoldi(size_type num_rows, matrix::Dense *krylov_bases, - matrix::Dense *hessenberg_iter, size_type iter, - const stopping_status *stop_status) +void finish_arnoldi(size_type num_rows, matrix::Dense* krylov_bases, + matrix::Dense* hessenberg_iter, size_type iter, + const stopping_status* stop_status) { const auto krylov_bases_rowoffset = num_rows; const auto next_krylov_rowoffset = (iter + 1) * krylov_bases_rowoffset; @@ -103,9 +103,9 @@ void finish_arnoldi(size_type num_rows, matrix::Dense *krylov_bases, template -void calculate_sin_and_cos(matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense *hessenberg_iter, +void calculate_sin_and_cos(matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense* hessenberg_iter, size_type iter, const size_type rhs) { if (hessenberg_iter->at(iter, rhs) == zero()) { @@ -125,10 +125,10 @@ void calculate_sin_and_cos(matrix::Dense *givens_sin, template -void givens_rotation(matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense *hessenberg_iter, size_type iter, - const stopping_status *stop_status) +void givens_rotation(matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense* hessenberg_iter, size_type iter, + const stopping_status* stop_status) { for (size_type i = 0; i < hessenberg_iter->get_size()[1]; ++i) { if (stop_status[i].has_stopped()) { @@ -163,10 +163,10 @@ void givens_rotation(matrix::Dense *givens_sin, template void calculate_next_residual_norm( - matrix::Dense *givens_sin, matrix::Dense *givens_cos, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, size_type iter, - const stopping_status *stop_status) + matrix::Dense* givens_sin, matrix::Dense* givens_cos, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, size_type iter, + const stopping_status* stop_status) { for (size_type i = 0; i < residual_norm->get_size()[1]; ++i) { if (stop_status[i].has_stopped()) { @@ -185,9 +185,9 @@ void calculate_next_residual_norm( template void solve_upper_triangular( - const matrix::Dense *residual_norm_collection, - const matrix::Dense *hessenberg, matrix::Dense *y, - const size_type *final_iter_nums) + const matrix::Dense* residual_norm_collection, + const matrix::Dense* hessenberg, matrix::Dense* y, + const size_type* final_iter_nums) { for (size_type k = 0; k < residual_norm_collection->get_size()[1]; ++k) { for (int i = final_iter_nums[k] - 1; i >= 0; --i) { @@ -207,10 +207,10 @@ void solve_upper_triangular( template -void calculate_qy(const matrix::Dense *krylov_bases, - const matrix::Dense *y, - matrix::Dense *before_preconditioner, - const size_type *final_iter_nums) +void calculate_qy(const matrix::Dense* krylov_bases, + const matrix::Dense* y, + matrix::Dense* before_preconditioner, + const size_type* final_iter_nums) { const auto krylov_bases_rowoffset = before_preconditioner->get_size()[0]; for (size_type k = 0; k < before_preconditioner->get_size()[1]; ++k) { @@ -231,11 +231,11 @@ void calculate_qy(const matrix::Dense *krylov_bases, template void initialize_1(std::shared_ptr exec, - const matrix::Dense *b, - matrix::Dense *residual, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - Array *stop_status, size_type krylov_dim) + const matrix::Dense* b, + matrix::Dense* residual, + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + Array* stop_status, size_type krylov_dim) { using NormValueType = remove_complex; for (size_type j = 0; j < b->get_size()[1]; ++j) { @@ -255,11 +255,11 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_INITIALIZE_1_KERNEL); template void initialize_2(std::shared_ptr exec, - const matrix::Dense *residual, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - matrix::Dense *krylov_bases, - Array *final_iter_nums, size_type krylov_dim) + const matrix::Dense* residual, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + matrix::Dense* krylov_bases, + Array* final_iter_nums, size_type krylov_dim) { for (size_type j = 0; j < residual->get_size()[1]; ++j) { // Calculate residual norm @@ -282,14 +282,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_INITIALIZE_2_KERNEL); template void step_1(std::shared_ptr exec, size_type num_rows, - matrix::Dense *givens_sin, - matrix::Dense *givens_cos, - matrix::Dense> *residual_norm, - matrix::Dense *residual_norm_collection, - matrix::Dense *krylov_bases, - matrix::Dense *hessenberg_iter, size_type iter, - Array *final_iter_nums, - const Array *stop_status) + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + matrix::Dense* krylov_bases, + matrix::Dense* hessenberg_iter, size_type iter, + Array* final_iter_nums, + const Array* stop_status) { for (size_type i = 0; i < final_iter_nums->get_num_elems(); ++i) { final_iter_nums->get_data()[i] += @@ -310,12 +310,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, - const matrix::Dense *residual_norm_collection, - const matrix::Dense *krylov_bases, - const matrix::Dense *hessenberg, - matrix::Dense *y, - matrix::Dense *before_preconditioner, - const Array *final_iter_nums) + const matrix::Dense* residual_norm_collection, + const matrix::Dense* krylov_bases, + const matrix::Dense* hessenberg, + matrix::Dense* y, + matrix::Dense* before_preconditioner, + const Array* final_iter_nums) { solve_upper_triangular(residual_norm_collection, hessenberg, y, final_iter_nums->get_const_data()); diff --git a/reference/solver/idr_kernels.cpp b/reference/solver/idr_kernels.cpp index 18a152cbf91..8bb544676b6 100644 --- a/reference/solver/idr_kernels.cpp +++ b/reference/solver/idr_kernels.cpp @@ -59,10 +59,10 @@ namespace { template void solve_lower_triangular(const size_type nrhs, - const matrix::Dense *m, - const matrix::Dense *f, - matrix::Dense *c, - const Array *stop_status) + const matrix::Dense* m, + const matrix::Dense* f, + matrix::Dense* c, + const Array* stop_status) { for (size_type i = 0; i < f->get_size()[1]; i++) { if (stop_status->get_const_data()[i].has_stopped()) { @@ -82,11 +82,11 @@ void solve_lower_triangular(const size_type nrhs, template void update_g_and_u(const size_type nrhs, const size_type k, - const matrix::Dense *p, - const matrix::Dense *m, - matrix::Dense *g, matrix::Dense *g_k, - matrix::Dense *u, - const Array *stop_status) + const matrix::Dense* p, + const matrix::Dense* m, + matrix::Dense* g, matrix::Dense* g_k, + matrix::Dense* u, + const Array* stop_status) { for (size_type i = 0; i < nrhs; i++) { if (stop_status->get_const_data()[i].has_stopped()) { @@ -114,7 +114,7 @@ void update_g_and_u(const size_type nrhs, const size_type k, template typename std::enable_if::value, ValueType>::type -get_rand_value(Distribution &&dist, Generator &&gen) +get_rand_value(Distribution&& dist, Generator&& gen) { return dist(gen); } @@ -122,7 +122,7 @@ get_rand_value(Distribution &&dist, Generator &&gen) template typename std::enable_if::value, ValueType>::type -get_rand_value(Distribution &&dist, Generator &&gen) +get_rand_value(Distribution&& dist, Generator&& gen) { return ValueType(dist(gen), dist(gen)); } @@ -133,9 +133,9 @@ get_rand_value(Distribution &&dist, Generator &&gen) template void initialize(std::shared_ptr exec, - const size_type nrhs, matrix::Dense *m, - matrix::Dense *subspace_vectors, bool deterministic, - Array *stop_status) + const size_type nrhs, matrix::Dense* m, + matrix::Dense* subspace_vectors, bool deterministic, + Array* stop_status) { // Initialize M for (size_type i = 0; i < nrhs; i++) { @@ -191,12 +191,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_INITIALIZE_KERNEL); template void step_1(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense *m, - const matrix::Dense *f, - const matrix::Dense *residual, - const matrix::Dense *g, matrix::Dense *c, - matrix::Dense *v, - const Array *stop_status) + const size_type k, const matrix::Dense* m, + const matrix::Dense* f, + const matrix::Dense* residual, + const matrix::Dense* g, matrix::Dense* c, + matrix::Dense* v, + const Array* stop_status) { // Compute c = M \ f solve_lower_triangular(nrhs, m, f, c, stop_status); @@ -221,10 +221,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_1_KERNEL); template void step_2(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense *omega, - const matrix::Dense *preconditioned_vector, - const matrix::Dense *c, matrix::Dense *u, - const Array *stop_status) + const size_type k, const matrix::Dense* omega, + const matrix::Dense* preconditioned_vector, + const matrix::Dense* c, matrix::Dense* u, + const Array* stop_status) { for (size_type i = 0; i < nrhs; i++) { if (stop_status->get_const_data()[i].has_stopped()) { @@ -246,12 +246,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_2_KERNEL); template void step_3(std::shared_ptr exec, const size_type nrhs, - const size_type k, const matrix::Dense *p, - matrix::Dense *g, matrix::Dense *g_k, - matrix::Dense *u, matrix::Dense *m, - matrix::Dense *f, matrix::Dense *, - matrix::Dense *residual, matrix::Dense *x, - const Array *stop_status) + const size_type k, const matrix::Dense* p, + matrix::Dense* g, matrix::Dense* g_k, + matrix::Dense* u, matrix::Dense* m, + matrix::Dense* f, matrix::Dense*, + matrix::Dense* residual, matrix::Dense* x, + const Array* stop_status) { update_g_and_u(nrhs, k, p, m, g, g_k, u, stop_status); @@ -290,9 +290,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_3_KERNEL); template void compute_omega( std::shared_ptr exec, const size_type nrhs, - const remove_complex kappa, const matrix::Dense *tht, - const matrix::Dense> *residual_norm, - matrix::Dense *omega, const Array *stop_status) + const remove_complex kappa, const matrix::Dense* tht, + const matrix::Dense>* residual_norm, + matrix::Dense* omega, const Array* stop_status) { for (size_type i = 0; i < nrhs; i++) { if (stop_status->get_const_data()[i].has_stopped()) { diff --git a/reference/solver/ir_kernels.cpp b/reference/solver/ir_kernels.cpp index b3f4b85bc7f..de9585baba1 100644 --- a/reference/solver/ir_kernels.cpp +++ b/reference/solver/ir_kernels.cpp @@ -45,7 +45,7 @@ namespace ir { void initialize(std::shared_ptr exec, - Array *stop_status) + Array* stop_status) { for (size_type j = 0; j < stop_status->get_num_elems(); ++j) { stop_status->get_data()[j].reset(); diff --git a/reference/solver/lower_trs_kernels.cpp b/reference/solver/lower_trs_kernels.cpp index 8247397c4ba..aca478c77da 100644 --- a/reference/solver/lower_trs_kernels.cpp +++ b/reference/solver/lower_trs_kernels.cpp @@ -56,14 +56,14 @@ namespace lower_trs { void should_perform_transpose(std::shared_ptr exec, - bool &do_transpose) + bool& do_transpose) { do_transpose = false; } void init_struct(std::shared_ptr exec, - std::shared_ptr &solve_struct) + std::shared_ptr& solve_struct) { // This init kernel is here to allow initialization of the solve struct for // a more sophisticated implementation as for other executors. @@ -72,8 +72,8 @@ void init_struct(std::shared_ptr exec, template void generate(std::shared_ptr exec, - const matrix::Csr *matrix, - solver::SolveStruct *solve_struct, const gko::size_type num_rhs) + const matrix::Csr* matrix, + solver::SolveStruct* solve_struct, const gko::size_type num_rhs) { // This generate kernel is here to allow for a more sophisticated // implementation as for other executors. This kernel would perform the @@ -91,10 +91,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( */ template void solve(std::shared_ptr exec, - const matrix::Csr *matrix, - const solver::SolveStruct *solve_struct, - matrix::Dense *trans_b, matrix::Dense *trans_x, - const matrix::Dense *b, matrix::Dense *x) + const matrix::Csr* matrix, + const solver::SolveStruct* solve_struct, + matrix::Dense* trans_b, matrix::Dense* trans_x, + const matrix::Dense* b, matrix::Dense* x) { auto row_ptrs = matrix->get_const_row_ptrs(); auto col_idxs = matrix->get_const_col_idxs(); diff --git a/reference/solver/upper_trs_kernels.cpp b/reference/solver/upper_trs_kernels.cpp index 7c938d505ab..46dbbd423a7 100644 --- a/reference/solver/upper_trs_kernels.cpp +++ b/reference/solver/upper_trs_kernels.cpp @@ -56,14 +56,14 @@ namespace upper_trs { void should_perform_transpose(std::shared_ptr exec, - bool &do_transpose) + bool& do_transpose) { do_transpose = false; } void init_struct(std::shared_ptr exec, - std::shared_ptr &solve_struct) + std::shared_ptr& solve_struct) { // This init kernel is here to allow initialization of the solve struct for // a more sophisticated implementation as for other executors. @@ -72,8 +72,8 @@ void init_struct(std::shared_ptr exec, template void generate(std::shared_ptr exec, - const matrix::Csr *matrix, - solver::SolveStruct *solve_struct, const gko::size_type num_rhs) + const matrix::Csr* matrix, + solver::SolveStruct* solve_struct, const gko::size_type num_rhs) { // This generate kernel is here to allow for a more sophisticated // implementation as for other executors. This kernel would perform the @@ -91,10 +91,10 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( */ template void solve(std::shared_ptr exec, - const matrix::Csr *matrix, - const solver::SolveStruct *solve_struct, - matrix::Dense *trans_b, matrix::Dense *trans_x, - const matrix::Dense *b, matrix::Dense *x) + const matrix::Csr* matrix, + const solver::SolveStruct* solve_struct, + matrix::Dense* trans_b, matrix::Dense* trans_x, + const matrix::Dense* b, matrix::Dense* x) { auto row_ptrs = matrix->get_const_row_ptrs(); auto col_idxs = matrix->get_const_col_idxs(); diff --git a/reference/stop/criterion_kernels.cpp b/reference/stop/criterion_kernels.cpp index e54f5f96f9f..ed17ad2ad41 100644 --- a/reference/stop/criterion_kernels.cpp +++ b/reference/stop/criterion_kernels.cpp @@ -49,7 +49,7 @@ namespace set_all_statuses { void set_all_statuses(std::shared_ptr exec, uint8 stoppingId, bool setFinalized, - Array *stop_status) + Array* stop_status) { for (int i = 0; i < stop_status->get_num_elems(); i++) { stop_status->get_data()[i].stop(stoppingId, setFinalized); diff --git a/reference/stop/residual_norm_kernels.cpp b/reference/stop/residual_norm_kernels.cpp index b1f25b52c29..3ac4eea6428 100644 --- a/reference/stop/residual_norm_kernels.cpp +++ b/reference/stop/residual_norm_kernels.cpp @@ -55,12 +55,12 @@ namespace residual_norm { template void residual_norm(std::shared_ptr exec, - const matrix::Dense *tau, - const matrix::Dense *orig_tau, + const matrix::Dense* tau, + const matrix::Dense* orig_tau, ValueType rel_residual_goal, uint8 stoppingId, - bool setFinalized, Array *stop_status, - Array *device_storage, bool *all_converged, - bool *one_changed) + bool setFinalized, Array* stop_status, + Array* device_storage, bool* all_converged, + bool* one_changed) { static_assert(is_complex_s::value == false, "ValueType must not be complex in this function!"); @@ -98,11 +98,11 @@ namespace implicit_residual_norm { template void implicit_residual_norm( std::shared_ptr exec, - const matrix::Dense *tau, - const matrix::Dense> *orig_tau, + const matrix::Dense* tau, + const matrix::Dense>* orig_tau, remove_complex rel_residual_goal, uint8 stoppingId, - bool setFinalized, Array *stop_status, - Array *device_storage, bool *all_converged, bool *one_changed) + bool setFinalized, Array* stop_status, + Array* device_storage, bool* all_converged, bool* one_changed) { *all_converged = true; *one_changed = false; diff --git a/reference/test/base/composition.cpp b/reference/test/base/composition.cpp index 5c964f94d49..5e501fb6dd4 100644 --- a/reference/test/base/composition.cpp +++ b/reference/test/base/composition.cpp @@ -60,10 +60,10 @@ class DummyLinOp : public gko::EnableLinOp>, bool apply_uses_initial_guess() const override { return true; } protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override {} + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override {} - void apply_impl(const gko::LinOp *alpha, const gko::LinOp *b, - const gko::LinOp *beta, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* alpha, const gko::LinOp* b, + const gko::LinOp* beta, gko::LinOp* x) const override {} explicit DummyLinOp(std::shared_ptr exec) diff --git a/reference/test/base/utils.cpp b/reference/test/base/utils.cpp index 4a904cc3f5a..ea1cdeb4b51 100644 --- a/reference/test/base/utils.cpp +++ b/reference/test/base/utils.cpp @@ -165,7 +165,7 @@ TEST_F(ConvertToWithSorting, DontSortWithRawPtr) TEST_F(ConvertToWithSorting, SortWithConstRawPtr) { - const Coo *cptr = unsorted_coo.get(); + const Coo* cptr = unsorted_coo.get(); auto result = gko::convert_to_with_sorting(ref, cptr, false); diff --git a/reference/test/factorization/ic_kernels.cpp b/reference/test/factorization/ic_kernels.cpp index a1ffc505e2f..8525d76b467 100644 --- a/reference/test/factorization/ic_kernels.cpp +++ b/reference/test/factorization/ic_kernels.cpp @@ -62,10 +62,10 @@ class DummyLinOp : public gko::EnableLinOp, {} protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override {} + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override {} - void apply_impl(const gko::LinOp *alpha, const gko::LinOp *b, - const gko::LinOp *beta, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* alpha, const gko::LinOp* b, + const gko::LinOp* beta, gko::LinOp* x) const override {} }; diff --git a/reference/test/factorization/ilu_kernels.cpp b/reference/test/factorization/ilu_kernels.cpp index 94321f71829..8c291554ff7 100644 --- a/reference/test/factorization/ilu_kernels.cpp +++ b/reference/test/factorization/ilu_kernels.cpp @@ -63,10 +63,10 @@ class DummyLinOp : public gko::EnableLinOp, {} protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override {} + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override {} - void apply_impl(const gko::LinOp *alpha, const gko::LinOp *b, - const gko::LinOp *beta, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* alpha, const gko::LinOp* b, + const gko::LinOp* beta, gko::LinOp* x) const override {} }; @@ -269,9 +269,9 @@ TYPED_TEST(Ilu, LUFactorFunctionsSetProperly) auto factors = this->ilu_factory_skip->generate(this->mtx_small); auto lin_op_l_factor = - static_cast(gko::lend(factors->get_l_factor())); + static_cast(gko::lend(factors->get_l_factor())); auto lin_op_u_factor = - static_cast(gko::lend(factors->get_u_factor())); + static_cast(gko::lend(factors->get_u_factor())); auto first_operator = gko::lend(factors->get_operators()[0]); auto second_operator = gko::lend(factors->get_operators()[1]); diff --git a/reference/test/factorization/par_ic_kernels.cpp b/reference/test/factorization/par_ic_kernels.cpp index 6211b7e0a59..c3e33fa57d2 100644 --- a/reference/test/factorization/par_ic_kernels.cpp +++ b/reference/test/factorization/par_ic_kernels.cpp @@ -64,10 +64,10 @@ class DummyLinOp : public gko::EnableLinOp, {} protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override {} + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override {} - void apply_impl(const gko::LinOp *alpha, const gko::LinOp *b, - const gko::LinOp *beta, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* alpha, const gko::LinOp* b, + const gko::LinOp* beta, gko::LinOp* x) const override {} }; diff --git a/reference/test/factorization/par_ict_kernels.cpp b/reference/test/factorization/par_ict_kernels.cpp index 5f0fc25c021..ada357eed10 100644 --- a/reference/test/factorization/par_ict_kernels.cpp +++ b/reference/test/factorization/par_ict_kernels.cpp @@ -64,10 +64,10 @@ class DummyLinOp : public gko::EnableLinOp, {} protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override {} + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override {} - void apply_impl(const gko::LinOp *alpha, const gko::LinOp *b, - const gko::LinOp *beta, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* alpha, const gko::LinOp* b, + const gko::LinOp* beta, gko::LinOp* x) const override {} }; @@ -284,9 +284,9 @@ TYPED_TEST(ParIct, IsConsistentWithComposition) auto fact = this->fact_fact->generate(this->mtx_system); auto lin_op_l_factor = - static_cast(gko::lend(fact->get_l_factor())); + static_cast(gko::lend(fact->get_l_factor())); auto lin_op_lt_factor = - static_cast(gko::lend(fact->get_lt_factor())); + static_cast(gko::lend(fact->get_lt_factor())); auto first_operator = gko::lend(fact->get_operators()[0]); auto second_operator = gko::lend(fact->get_operators()[1]); diff --git a/reference/test/factorization/par_ilu_kernels.cpp b/reference/test/factorization/par_ilu_kernels.cpp index 2869631cadd..cd68a6e91df 100644 --- a/reference/test/factorization/par_ilu_kernels.cpp +++ b/reference/test/factorization/par_ilu_kernels.cpp @@ -65,10 +65,10 @@ class DummyLinOp : public gko::EnableLinOp, {} protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override {} + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override {} - void apply_impl(const gko::LinOp *alpha, const gko::LinOp *b, - const gko::LinOp *beta, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* alpha, const gko::LinOp* b, + const gko::LinOp* beta, gko::LinOp* x) const override {} }; @@ -434,7 +434,7 @@ TYPED_TEST(ParIlu, KernelComputeLU) // The expected result of U also needs to be transposed auto u_expected_lin_op = this->small_u_expected->transpose(); auto u_expected = std::unique_ptr( - static_cast(u_expected_lin_op.release())); + static_cast(u_expected_lin_op.release())); gko::kernels::reference::par_ilu_factorization::compute_l_u_factors( this->ref, iterations, gko::lend(mtx_coo), gko::lend(l_csr), @@ -510,9 +510,9 @@ TYPED_TEST(ParIlu, LUFactorFunctionsSetProperly) auto factors = this->ilu_factory_skip->generate(this->mtx_small); auto lin_op_l_factor = - static_cast(gko::lend(factors->get_l_factor())); + static_cast(gko::lend(factors->get_l_factor())); auto lin_op_u_factor = - static_cast(gko::lend(factors->get_u_factor())); + static_cast(gko::lend(factors->get_u_factor())); auto first_operator = gko::lend(factors->get_operators()[0]); auto second_operator = gko::lend(factors->get_operators()[1]); diff --git a/reference/test/factorization/par_ilut_kernels.cpp b/reference/test/factorization/par_ilut_kernels.cpp index 33dd875ab92..192b0ae9a7e 100644 --- a/reference/test/factorization/par_ilut_kernels.cpp +++ b/reference/test/factorization/par_ilut_kernels.cpp @@ -63,10 +63,10 @@ class DummyLinOp : public gko::EnableLinOp, {} protected: - void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override {} + void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override {} - void apply_impl(const gko::LinOp *alpha, const gko::LinOp *b, - const gko::LinOp *beta, gko::LinOp *x) const override + void apply_impl(const gko::LinOp* alpha, const gko::LinOp* b, + const gko::LinOp* beta, gko::LinOp* x) const override {} }; @@ -201,7 +201,7 @@ class ParIlut : public ::testing::Test { {} template - void test_select(const std::unique_ptr &mtx, index_type rank, + void test_select(const std::unique_ptr& mtx, index_type rank, gko::remove_complex expected, gko::remove_complex tolerance = 0.0) { @@ -221,9 +221,9 @@ class ParIlut : public ::testing::Test { template > - void test_filter(const std::unique_ptr &mtx, + void test_filter(const std::unique_ptr& mtx, gko::remove_complex threshold, - const std::unique_ptr &expected, bool lower) + const std::unique_ptr& expected, bool lower) { auto res_mtx = Mtx::create(exec, mtx->get_size()); auto res_mtx_coo = Coo::create(exec, mtx->get_size()); @@ -245,8 +245,8 @@ class ParIlut : public ::testing::Test { template > - void test_filter_approx(const std::unique_ptr &mtx, index_type rank, - const std::unique_ptr &expected) + void test_filter_approx(const std::unique_ptr& mtx, index_type rank, + const std::unique_ptr& expected) { auto res_mtx = Mtx::create(exec, mtx->get_size()); auto res_mtx_coo = Coo::create(exec, mtx->get_size()); @@ -347,7 +347,7 @@ TYPED_TEST(ParIlut, KernelThresholdFilterNullptrCoo) using Csr = typename TestFixture::Csr; using Coo = typename TestFixture::Coo; auto res_mtx = Csr::create(this->exec, this->mtx1->get_size()); - Coo *null_coo = nullptr; + Coo* null_coo = nullptr; gko::kernels::reference::par_ilut_factorization::threshold_filter( this->ref, this->mtx1.get(), 0.0, res_mtx.get(), null_coo, true); @@ -428,7 +428,7 @@ TYPED_TEST(ParIlut, KernelThresholdFilterApproxNullptrCoo) auto res_mtx = Csr::create(this->exec, this->mtx1->get_size()); auto tmp = gko::Array{this->ref}; gko::remove_complex threshold{}; - Coo *null_coo = nullptr; + Coo* null_coo = nullptr; index_type rank{}; gko::kernels::reference::par_ilut_factorization::threshold_filter_approx( @@ -558,9 +558,9 @@ TYPED_TEST(ParIlut, IsConsistentWithComposition) auto fact = this->fact_fact->generate(this->mtx_system); auto lin_op_l_factor = - static_cast(gko::lend(fact->get_l_factor())); + static_cast(gko::lend(fact->get_l_factor())); auto lin_op_u_factor = - static_cast(gko::lend(fact->get_u_factor())); + static_cast(gko::lend(fact->get_u_factor())); auto first_operator = gko::lend(fact->get_operators()[0]); auto second_operator = gko::lend(fact->get_operators()[1]); diff --git a/reference/test/log/papi.cpp b/reference/test/log/papi.cpp index fd84c507125..4e06af2f51e 100644 --- a/reference/test/log/papi.cpp +++ b/reference/test/log/papi.cpp @@ -70,8 +70,8 @@ class Papi : public ::testing::Test { void TearDown() { eventset = PAPI_NULL; } template - const std::string init(const gko::log::Logger::mask_type &event, - const std::string &event_name, U *ptr) + const std::string init(const gko::log::Logger::mask_type& event, + const std::string& event_name, U* ptr) { logger = gko::log::Papi::create(exec, event); std::ostringstream os; @@ -80,7 +80,7 @@ class Papi : public ::testing::Test { return os.str(); } - void add_event(const std::string &event_name) + void add_event(const std::string& event_name) { int code; int ret_val = PAPI_event_name_to_code(event_name.c_str(), &code); @@ -102,7 +102,7 @@ class Papi : public ::testing::Test { } } - void stop(long long int *values) + void stop(long long int* values) { int ret_val = PAPI_stop(eventset, values); if (PAPI_OK != ret_val) { @@ -137,7 +137,7 @@ TYPED_TEST(Papi, CatchesCriterionCheckCompleted) nullptr, false, false); long long int values[2]; this->stop(values); - double *sde_ptr = GET_SDE_RECORDER_ADDRESS(values[1], double); + double* sde_ptr = GET_SDE_RECORDER_ADDRESS(values[1], double); ASSERT_EQ(values[0], 1); ASSERT_EQ(sde_ptr[0], 4.0); diff --git a/reference/test/matrix/coo_kernels.cpp b/reference/test/matrix/coo_kernels.cpp index b975214dcbf..35c3dc76271 100644 --- a/reference/test/matrix/coo_kernels.cpp +++ b/reference/test/matrix/coo_kernels.cpp @@ -79,7 +79,7 @@ class Coo : public ::testing::Test { std::swap(vals[0], vals[1]); } - void assert_equal_to_mtx_in_csr_format(const Csr *m) + void assert_equal_to_mtx_in_csr_format(const Csr* m) { auto v = m->get_const_values(); auto c = m->get_const_col_idxs(); diff --git a/reference/test/matrix/csr_kernels.cpp b/reference/test/matrix/csr_kernels.cpp index 162ec21654b..1303210b593 100644 --- a/reference/test/matrix/csr_kernels.cpp +++ b/reference/test/matrix/csr_kernels.cpp @@ -93,12 +93,12 @@ class Csr : public ::testing::Test { this->create_mtx3(mtx3_sorted.get(), mtx3_unsorted.get()); } - void create_mtx(Mtx *m) + void create_mtx(Mtx* m) { - value_type *v = m->get_values(); - index_type *c = m->get_col_idxs(); - index_type *r = m->get_row_ptrs(); - auto *s = m->get_srow(); + value_type* v = m->get_values(); + index_type* c = m->get_col_idxs(); + index_type* r = m->get_row_ptrs(); + auto* s = m->get_srow(); /* * 1 3 2 * 0 5 0 @@ -117,11 +117,11 @@ class Csr : public ::testing::Test { s[0] = 0; } - void create_mtx2(Mtx *m) + void create_mtx2(Mtx* m) { - value_type *v = m->get_values(); - index_type *c = m->get_col_idxs(); - index_type *r = m->get_row_ptrs(); + value_type* v = m->get_values(); + index_type* c = m->get_col_idxs(); + index_type* r = m->get_row_ptrs(); // It keeps an explict zero /* * 1 3 2 @@ -142,7 +142,7 @@ class Csr : public ::testing::Test { v[4] = 5.0; } - void create_mtx3(Mtx *sorted, Mtx *unsorted) + void create_mtx3(Mtx* sorted, Mtx* unsorted) { auto vals_s = sorted->get_values(); auto cols_s = sorted->get_col_idxs(); @@ -199,7 +199,7 @@ class Csr : public ::testing::Test { cols_u[6] = 0; } - void assert_equal_to_mtx(const Coo *m) + void assert_equal_to_mtx(const Coo* m) { auto v = m->get_const_values(); auto c = m->get_const_col_idxs(); @@ -221,7 +221,7 @@ class Csr : public ::testing::Test { EXPECT_EQ(v[3], value_type{5.0}); } - void assert_equal_to_mtx(const Sellp *m) + void assert_equal_to_mtx(const Sellp* m) { auto v = m->get_const_values(); auto c = m->get_const_col_idxs(); @@ -249,10 +249,10 @@ class Csr : public ::testing::Test { EXPECT_EQ(v[129], value_type{0.0}); } - void assert_equal_to_mtx(const SparsityCsr *m) + void assert_equal_to_mtx(const SparsityCsr* m) { - auto *c = m->get_const_col_idxs(); - auto *r = m->get_const_row_ptrs(); + auto* c = m->get_const_col_idxs(); + auto* r = m->get_const_row_ptrs(); ASSERT_EQ(m->get_size(), gko::dim<2>(2, 3)); ASSERT_EQ(m->get_num_nonzeros(), 4); @@ -265,7 +265,7 @@ class Csr : public ::testing::Test { EXPECT_EQ(c[3], 1); } - void assert_equal_to_mtx(const Ell *m) + void assert_equal_to_mtx(const Ell* m) { auto v = m->get_const_values(); auto c = m->get_const_col_idxs(); @@ -286,7 +286,7 @@ class Csr : public ::testing::Test { EXPECT_EQ(v[5], value_type{0.0}); } - void assert_equal_to_mtx(const Hybrid *m) + void assert_equal_to_mtx(const Hybrid* m) { auto v = m->get_const_coo_values(); auto c = m->get_const_coo_col_idxs(); @@ -313,7 +313,7 @@ class Csr : public ::testing::Test { EXPECT_EQ(v[3], value_type{5.0}); } - void assert_equal_to_mtx2(const Hybrid *m) + void assert_equal_to_mtx2(const Hybrid* m) { auto v = m->get_const_coo_values(); auto c = m->get_const_coo_col_idxs(); diff --git a/reference/test/matrix/dense_kernels.cpp b/reference/test/matrix/dense_kernels.cpp index 992bce8194b..4e9124fa8e2 100644 --- a/reference/test/matrix/dense_kernels.cpp +++ b/reference/test/matrix/dense_kernels.cpp @@ -4036,7 +4036,7 @@ TYPED_TEST(Dense, MakeTemporaryConversionConstDoesntConvertBack) { auto conversion = gko::make_temporary_conversion( - static_cast(alpha.get())); + static_cast(alpha.get())); alpha->at(0, 0) = MixedT{7.0}; } diff --git a/reference/test/matrix/diagonal_kernels.cpp b/reference/test/matrix/diagonal_kernels.cpp index 73c86376a68..fe9b648603f 100644 --- a/reference/test/matrix/diagonal_kernels.cpp +++ b/reference/test/matrix/diagonal_kernels.cpp @@ -81,16 +81,16 @@ class Diagonal : public ::testing::Test { this->create_diag2(diag2.get()); } - void create_diag1(Diag *d) + void create_diag1(Diag* d) { - auto *v = d->get_values(); + auto* v = d->get_values(); v[0] = 2.0; v[1] = 3.0; } - void create_diag2(Diag *d) + void create_diag2(Diag* d) { - auto *v = d->get_values(); + auto* v = d->get_values(); v[0] = 2.0; v[1] = 3.0; v[2] = 4.0; @@ -649,7 +649,7 @@ TYPED_TEST(DiagonalComplex, MtxIsConjugateTransposable) diag_values[2] = value_type{0.0, 1.5}; auto trans = diag->conj_transpose(); - auto trans_as_diagonal = static_cast(trans.get()); + auto trans_as_diagonal = static_cast(trans.get()); auto trans_values = trans_as_diagonal->get_values(); EXPECT_EQ(trans->get_size(), gko::dim<2>(3)); diff --git a/reference/test/matrix/ell_kernels.cpp b/reference/test/matrix/ell_kernels.cpp index c3326737e4f..fa18b120284 100644 --- a/reference/test/matrix/ell_kernels.cpp +++ b/reference/test/matrix/ell_kernels.cpp @@ -79,7 +79,7 @@ class Ell : public ::testing::Test { // clang-format on } - void assert_equal_to_mtx(const Csr *m) + void assert_equal_to_mtx(const Csr* m) { auto v = m->get_const_values(); auto c = m->get_const_col_idxs(); diff --git a/reference/test/matrix/fbcsr_kernels.cpp b/reference/test/matrix/fbcsr_kernels.cpp index 96f376fa68c..f8076e82dfc 100644 --- a/reference/test/matrix/fbcsr_kernels.cpp +++ b/reference/test/matrix/fbcsr_kernels.cpp @@ -89,7 +89,7 @@ class Fbcsr : public ::testing::Test { mtxsq(fbsamplesquare.generate_fbcsr()) {} - void assert_equal_to_mtx(const Csr *const m) + void assert_equal_to_mtx(const Csr* const m) { ASSERT_EQ(m->get_size(), refcsrmtx->get_size()); ASSERT_EQ(m->get_num_stored_elements(), @@ -105,7 +105,7 @@ class Fbcsr : public ::testing::Test { } } - void assert_equal_to_mtx(const SparCsr *m) + void assert_equal_to_mtx(const SparCsr* m) { ASSERT_EQ(m->get_size(), refspcmtx->get_size()); ASSERT_EQ(m->get_num_nonzeros(), refspcmtx->get_num_nonzeros()); @@ -162,7 +162,7 @@ TYPED_TEST(Fbcsr, AppliesToDenseVector) const index_type nrows = this->mtx2->get_size()[0]; const index_type ncols = this->mtx2->get_size()[1]; auto x = Vec::create(this->exec, gko::dim<2>{(gko::size_type)ncols, 1}); - T *const xvals = x->get_values(); + T* const xvals = x->get_values(); for (index_type i = 0; i < ncols; i++) { xvals[i] = std::sin(static_cast(static_cast((i + 1) ^ 2))); } @@ -568,9 +568,9 @@ TYPED_TEST(Fbcsr, SquareMtxIsTransposable) this->mtxsq->convert_to(csrmtxsq.get()); std::unique_ptr reftmtx = csrmtxsq->transpose(); - auto reftmtx_as_csr = static_cast(reftmtx.get()); + auto reftmtx_as_csr = static_cast(reftmtx.get()); auto trans = this->mtxsq->transpose(); - auto trans_as_fbcsr = static_cast(trans.get()); + auto trans_as_fbcsr = static_cast(trans.get()); GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, reftmtx_as_csr, 0.0); } @@ -585,9 +585,9 @@ TYPED_TEST(Fbcsr, NonSquareMtxIsTransposable) this->mtx2->convert_to(csrmtx.get()); std::unique_ptr reftmtx = csrmtx->transpose(); - auto reftmtx_as_csr = static_cast(reftmtx.get()); + auto reftmtx_as_csr = static_cast(reftmtx.get()); auto trans = this->mtx2->transpose(); - auto trans_as_fbcsr = static_cast(trans.get()); + auto trans_as_fbcsr = static_cast(trans.get()); GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, reftmtx_as_csr, 0.0); } @@ -604,7 +604,7 @@ TYPED_TEST(Fbcsr, RecognizeUnsortedMatrix) using Fbcsr = typename TestFixture::Mtx; using index_type = typename TestFixture::index_type; auto cpmat = this->mtx->clone(); - index_type *const colinds = cpmat->get_col_idxs(); + index_type* const colinds = cpmat->get_col_idxs(); std::swap(colinds[0], colinds[1]); ASSERT_FALSE(cpmat->is_sorted_by_column_index()); @@ -723,9 +723,9 @@ TYPED_TEST(FbcsrComplex, MtxIsConjugateTransposable) auto mtx = csample.generate_fbcsr(); auto reftranslinop = csrmtx->conj_transpose(); - auto reftrans = static_cast(reftranslinop.get()); + auto reftrans = static_cast(reftranslinop.get()); auto trans = mtx->conj_transpose(); - auto trans_as_fbcsr = static_cast(trans.get()); + auto trans_as_fbcsr = static_cast(trans.get()); GKO_ASSERT_MTX_NEAR(trans_as_fbcsr, reftrans, 0.0); } diff --git a/reference/test/matrix/hybrid_kernels.cpp b/reference/test/matrix/hybrid_kernels.cpp index 800ef70b768..81f0bbc6507 100644 --- a/reference/test/matrix/hybrid_kernels.cpp +++ b/reference/test/matrix/hybrid_kernels.cpp @@ -104,7 +104,7 @@ class Hybrid : public ::testing::Test { coo_row[1] = 1; } - void assert_equal_to_mtx(const Csr *m) + void assert_equal_to_mtx(const Csr* m) { auto v = m->get_const_values(); auto c = m->get_const_col_idxs(); diff --git a/reference/test/matrix/sparsity_csr.cpp b/reference/test/matrix/sparsity_csr.cpp index e87eeacbc5a..023a308da94 100644 --- a/reference/test/matrix/sparsity_csr.cpp +++ b/reference/test/matrix/sparsity_csr.cpp @@ -64,8 +64,8 @@ class SparsityCsr : public ::testing::Test { : exec(gko::ReferenceExecutor::create()), mtx(Mtx::create(exec, gko::dim<2>{2, 3}, 4)) { - i_type *c = mtx->get_col_idxs(); - i_type *r = mtx->get_row_ptrs(); + i_type* c = mtx->get_col_idxs(); + i_type* r = mtx->get_row_ptrs(); r[0] = 0; r[1] = 3; r[2] = 4; diff --git a/reference/test/matrix/sparsity_csr_kernels.cpp b/reference/test/matrix/sparsity_csr_kernels.cpp index f0c3239271f..aadeeeb6835 100644 --- a/reference/test/matrix/sparsity_csr_kernels.cpp +++ b/reference/test/matrix/sparsity_csr_kernels.cpp @@ -75,10 +75,10 @@ class SparsityCsr : public ::testing::Test { this->create_mtx3(mtx3_sorted.get(), mtx3_unsorted.get()); } - void create_mtx(Mtx *m) + void create_mtx(Mtx* m) { - index_type *c = m->get_col_idxs(); - index_type *r = m->get_row_ptrs(); + index_type* c = m->get_col_idxs(); + index_type* r = m->get_row_ptrs(); /* * 1 1 1 * 0 1 0 @@ -92,10 +92,10 @@ class SparsityCsr : public ::testing::Test { c[3] = 1; } - void create_mtx2(Mtx *m) + void create_mtx2(Mtx* m) { - index_type *c = m->get_col_idxs(); - index_type *r = m->get_row_ptrs(); + index_type* c = m->get_col_idxs(); + index_type* r = m->get_row_ptrs(); // It keeps an explict zero /* * 1 1 1 @@ -111,7 +111,7 @@ class SparsityCsr : public ::testing::Test { c[4] = 1; } - void create_mtx3(Mtx *sorted, Mtx *unsorted) + void create_mtx3(Mtx* sorted, Mtx* unsorted) { auto cols_s = sorted->get_col_idxs(); auto rows_s = sorted->get_row_ptrs(); @@ -368,7 +368,7 @@ TYPED_TEST(SparsityCsr, SquareMtxIsTransposable) // clang-format on auto trans = mtx2->transpose(); - auto trans_as_sparsity = static_cast(trans.get()); + auto trans_as_sparsity = static_cast(trans.get()); // clang-format off GKO_ASSERT_MTX_NEAR(trans_as_sparsity, @@ -383,7 +383,7 @@ TYPED_TEST(SparsityCsr, NonSquareMtxIsTransposable) { using Mtx = typename TestFixture::Mtx; auto trans = this->mtx->transpose(); - auto trans_as_sparsity = static_cast(trans.get()); + auto trans_as_sparsity = static_cast(trans.get()); // clang-format off GKO_ASSERT_MTX_NEAR(trans_as_sparsity, diff --git a/reference/test/multigrid/amgx_pgm_kernels.cpp b/reference/test/multigrid/amgx_pgm_kernels.cpp index 89efb56c890..ae523c23e6c 100644 --- a/reference/test/multigrid/amgx_pgm_kernels.cpp +++ b/reference/test/multigrid/amgx_pgm_kernels.cpp @@ -111,8 +111,8 @@ class AmgxPgm : public ::testing::Test { mtx_diag = weight->extract_diagonal(); } - void create_mtx(Mtx *fine, WeightMtx *weight, gko::Array *agg, - Mtx *coarse) + void create_mtx(Mtx* fine, WeightMtx* weight, gko::Array* agg, + Mtx* coarse) { auto agg_val = agg->get_data(); agg_val[0] = 0; @@ -176,7 +176,7 @@ class AmgxPgm : public ::testing::Test { coarse->read({{2, 2}, {{0, 0, 6}, {0, 1, -5}, {1, 0, -4}, {1, 1, 5}}}); } - static void assert_same_matrices(const Mtx *m1, const Mtx *m2) + static void assert_same_matrices(const Mtx* m1, const Mtx* m2) { ASSERT_EQ(m1->get_size()[0], m2->get_size()[0]); ASSERT_EQ(m1->get_size()[1], m2->get_size()[1]); @@ -190,7 +190,7 @@ class AmgxPgm : public ::testing::Test { } } - static void assert_same_agg(const index_type *m1, const index_type *m2, + static void assert_same_agg(const index_type* m1, const index_type* m2, gko::size_type len) { for (gko::size_type i = 0; i < len; ++i) { @@ -228,11 +228,11 @@ TYPED_TEST(AmgxPgm, CanBeCopied) auto copy_agg = copy->get_const_agg(); auto copy_coarse = copy->get_coarse_op(); - this->assert_same_matrices(static_cast(copy_mtx.get()), + this->assert_same_matrices(static_cast(copy_mtx.get()), this->mtx.get()); this->assert_same_agg(copy_agg, this->agg.get_data(), this->agg.get_num_elems()); - this->assert_same_matrices(static_cast(copy_coarse.get()), + this->assert_same_matrices(static_cast(copy_coarse.get()), this->coarse.get()); } @@ -248,11 +248,11 @@ TYPED_TEST(AmgxPgm, CanBeMoved) auto copy_agg = copy->get_const_agg(); auto copy_coarse = copy->get_coarse_op(); - this->assert_same_matrices(static_cast(copy_mtx.get()), + this->assert_same_matrices(static_cast(copy_mtx.get()), this->mtx.get()); this->assert_same_agg(copy_agg, this->agg.get_data(), this->agg.get_num_elems()); - this->assert_same_matrices(static_cast(copy_coarse.get()), + this->assert_same_matrices(static_cast(copy_coarse.get()), this->coarse.get()); } @@ -266,11 +266,11 @@ TYPED_TEST(AmgxPgm, CanBeCloned) auto clone_agg = clone->get_const_agg(); auto clone_coarse = clone->get_coarse_op(); - this->assert_same_matrices(static_cast(clone_mtx.get()), + this->assert_same_matrices(static_cast(clone_mtx.get()), this->mtx.get()); this->assert_same_agg(clone_agg, this->agg.get_data(), this->agg.get_num_elems()); - this->assert_same_matrices(static_cast(clone_coarse.get()), + this->assert_same_matrices(static_cast(clone_coarse.get()), this->coarse.get()); } diff --git a/reference/test/preconditioner/isai_kernels.cpp b/reference/test/preconditioner/isai_kernels.cpp index c501f956cfc..bcff4d58efe 100644 --- a/reference/test/preconditioner/isai_kernels.cpp +++ b/reference/test/preconditioner/isai_kernels.cpp @@ -232,14 +232,14 @@ class Isai : public ::testing::Test { } template - std::unique_ptr read(const char *name) + std::unique_ptr read(const char* name) { std::ifstream mtxstream{std::string{gko::matrices::location_isai_mtxs} + name}; auto result = gko::read(mtxstream, exec); // to avoid removing 0s, the matrices store 12345 instead for (gko::size_type i = 0; i < result->get_num_stored_elements(); ++i) { - auto &val = result->get_values()[i]; + auto& val = result->get_values()[i]; if (val == static_cast(12345.0)) { val = 0; } @@ -247,7 +247,7 @@ class Isai : public ::testing::Test { return std::move(result); } - std::unique_ptr clone_allocations(const Csr *csr_mtx) + std::unique_ptr clone_allocations(const Csr* csr_mtx) { const auto num_elems = csr_mtx->get_num_stored_elements(); auto sparsity = csr_mtx->clone(); @@ -257,7 +257,7 @@ class Isai : public ::testing::Test { return sparsity; } - std::unique_ptr transpose(const Csr *mtx) + std::unique_ptr transpose(const Csr* mtx) { return gko::as(mtx->transpose()); } diff --git a/reference/test/preconditioner/jacobi.cpp b/reference/test/preconditioner/jacobi.cpp index 330195cee6a..1f901f4e1b2 100644 --- a/reference/test/preconditioner/jacobi.cpp +++ b/reference/test/preconditioner/jacobi.cpp @@ -105,14 +105,14 @@ class Jacobi : public ::testing::Test { } template - void init_array(T *arr, std::initializer_list vals) + void init_array(T* arr, std::initializer_list vals) { std::copy(std::begin(vals), std::end(vals), arr); } template - void assert_same_block(gko::size_type block_size, const ValueType *ptr_a, - gko::size_type stride_a, const ValueType *ptr_b, + void assert_same_block(gko::size_type block_size, const ValueType* ptr_a, + gko::size_type stride_a, const ValueType* ptr_b, gko::size_type stride_b) { for (int i = 0; i < block_size; ++i) { @@ -124,7 +124,7 @@ class Jacobi : public ::testing::Test { } } - void assert_same_precond(const Bj *a, const Bj *b) + void assert_same_precond(const Bj* a, const Bj* b) { ASSERT_EQ(a->get_size()[0], b->get_size()[0]); ASSERT_EQ(a->get_size()[1], b->get_size()[1]); @@ -154,11 +154,11 @@ class Jacobi : public ::testing::Test { value_type, prec_a, assert_same_block( b_ptr_a[i + 1] - b_ptr_a[i], - reinterpret_cast( + reinterpret_cast( a->get_blocks() + scheme.get_group_offset(i)) + scheme.get_block_offset(i), scheme.get_stride(), - reinterpret_cast( + reinterpret_cast( a->get_blocks() + scheme.get_group_offset(i)) + scheme.get_block_offset(i), scheme.get_stride())); @@ -366,7 +366,7 @@ TYPED_TEST(Jacobi, ScalarJacobiCanBeTransposed) template -void init_array(T *arr, std::initializer_list vals) +void init_array(T* arr, std::initializer_list vals) { std::copy(std::begin(vals), std::end(vals), arr); } diff --git a/reference/test/preconditioner/jacobi_kernels.cpp b/reference/test/preconditioner/jacobi_kernels.cpp index 55192bf36c6..9f9c17fc6ad 100644 --- a/reference/test/preconditioner/jacobi_kernels.cpp +++ b/reference/test/preconditioner/jacobi_kernels.cpp @@ -103,7 +103,7 @@ class Jacobi : public ::testing::Test { } template - void init_array(T *arr, std::initializer_list vals) + void init_array(T* arr, std::initializer_list vals) { for (auto elem : vals) { *(arr++) = elem; @@ -309,7 +309,7 @@ TYPED_TEST(Jacobi, InvertsDiagonalBlocksWithAdaptivePrecision) const auto b_prec_bj = bj->get_parameters().storage_optimization.block_wise.get_const_data(); using reduced = ::gko::reduce_precision; - auto b1 = reinterpret_cast( + auto b1 = reinterpret_cast( bj->get_blocks() + scheme.get_global_block_offset(0)); GKO_EXPECT_NEAR(b1[0 + 0 * p], reduced{4.0 / 14.0}, half_tol); GKO_EXPECT_NEAR(b1[0 + 1 * p], reduced{2.0 / 14.0}, half_tol); @@ -369,7 +369,7 @@ TYPED_TEST(Jacobi, CanTransposeDiagonalBlocksWithAdaptivePrecision) auto scheme = bj->get_storage_scheme(); auto p = scheme.get_stride(); using reduced = ::gko::reduce_precision; - auto b1 = reinterpret_cast( + auto b1 = reinterpret_cast( bj->get_blocks() + scheme.get_global_block_offset(0)); GKO_EXPECT_NEAR(b1[0 + 0 * p], reduced{4.0 / 14.0}, half_tol); GKO_EXPECT_NEAR(b1[1 + 0 * p], reduced{2.0 / 14.0}, half_tol); @@ -428,7 +428,7 @@ TYPED_TEST(Jacobi, CanConjTransposeDiagonalBlocksWithAdaptivePrecision) auto scheme = bj->get_storage_scheme(); auto p = scheme.get_stride(); using reduced = ::gko::reduce_precision; - auto b1 = reinterpret_cast( + auto b1 = reinterpret_cast( bj->get_blocks() + scheme.get_global_block_offset(0)); GKO_EXPECT_NEAR(b1[0 + 0 * p], reduced{4.0 / 14.0}, half_tol); GKO_EXPECT_NEAR(b1[1 + 0 * p], reduced{2.0 / 14.0}, half_tol); @@ -552,7 +552,7 @@ TYPED_TEST(Jacobi, PivotsWhenInvertingBlocksWithiAdaptivePrecision) auto scheme = bj->get_storage_scheme(); auto p = scheme.get_stride(); using reduced = ::gko::reduce_precision; - auto b1 = reinterpret_cast( + auto b1 = reinterpret_cast( bj->get_blocks() + scheme.get_global_block_offset(0)); GKO_EXPECT_NEAR(b1[0 + 0 * p], reduced{0.0 / 4.0}, half_tol); GKO_EXPECT_NEAR(b1[0 + 1 * p], reduced{0.0 / 4.0}, half_tol); diff --git a/reference/test/reorder/rcm_kernels.cpp b/reference/test/reorder/rcm_kernels.cpp index a5b2b3069a3..7ee7baca070 100644 --- a/reference/test/reorder/rcm_kernels.cpp +++ b/reference/test/reorder/rcm_kernels.cpp @@ -96,7 +96,7 @@ class Rcm : public ::testing::Test { std::shared_ptr p_mtx_1; std::unique_ptr reorder_op_1; - static bool is_permutation(const perm_type *input_perm) + static bool is_permutation(const perm_type* input_perm) { const auto perm_size = input_perm->get_permutation_size(); auto perm_sorted = std::vector(perm_size); diff --git a/reference/test/solver/bicg_kernels.cpp b/reference/test/solver/bicg_kernels.cpp index 1a5b98d1778..61001ece012 100644 --- a/reference/test/solver/bicg_kernels.cpp +++ b/reference/test/solver/bicg_kernels.cpp @@ -549,7 +549,7 @@ TYPED_TEST(Bicg, SolvesNonSymmetricStencilSystem) template -gko::remove_complex infNorm(gko::matrix::Dense *mat, size_t col = 0) +gko::remove_complex infNorm(gko::matrix::Dense* mat, size_t col = 0) { using std::abs; using no_cpx_t = gko::remove_complex; diff --git a/reference/test/solver/bicgstab_kernels.cpp b/reference/test/solver/bicgstab_kernels.cpp index 85b3a591378..c444444ca4a 100644 --- a/reference/test/solver/bicgstab_kernels.cpp +++ b/reference/test/solver/bicgstab_kernels.cpp @@ -677,7 +677,7 @@ TYPED_TEST(Bicgstab, SolvesBigDenseSystemForDivergenceCheck2) template -gko::remove_complex infNorm(gko::matrix::Dense *mat, size_t col = 0) +gko::remove_complex infNorm(gko::matrix::Dense* mat, size_t col = 0) { using std::abs; using no_cpx_t = gko::remove_complex; diff --git a/reference/test/solver/cb_gmres_kernels.cpp b/reference/test/solver/cb_gmres_kernels.cpp index 76f5d55e8b9..c544b118a8f 100644 --- a/reference/test/solver/cb_gmres_kernels.cpp +++ b/reference/test/solver/cb_gmres_kernels.cpp @@ -446,7 +446,7 @@ TYPED_TEST(CbGmres, SolvesBigDenseSystem2) template -gko::remove_complex inf_norm(gko::matrix::Dense *mat, size_t col = 0) +gko::remove_complex inf_norm(gko::matrix::Dense* mat, size_t col = 0) { using std::abs; auto host_data = clone(mat->get_executor()->get_master(), mat); diff --git a/reference/test/solver/cg_kernels.cpp b/reference/test/solver/cg_kernels.cpp index d5b296afe61..2d4ae540fb0 100644 --- a/reference/test/solver/cg_kernels.cpp +++ b/reference/test/solver/cg_kernels.cpp @@ -496,7 +496,7 @@ TYPED_TEST(Cg, SolvesBigDenseSystem3) template -gko::remove_complex infNorm(gko::matrix::Dense *mat, size_t col = 0) +gko::remove_complex infNorm(gko::matrix::Dense* mat, size_t col = 0) { using std::abs; using no_cpx_t = gko::remove_complex; diff --git a/reference/test/solver/cgs_kernels.cpp b/reference/test/solver/cgs_kernels.cpp index 74c80c8bfe7..9f73f4a67bc 100644 --- a/reference/test/solver/cgs_kernels.cpp +++ b/reference/test/solver/cgs_kernels.cpp @@ -559,7 +559,7 @@ TYPED_TEST(Cgs, SolvesBigDenseSystem2) template -gko::remove_complex infNorm(gko::matrix::Dense *mat, size_t col = 0) +gko::remove_complex infNorm(gko::matrix::Dense* mat, size_t col = 0) { using std::abs; using no_cpx_t = gko::remove_complex; diff --git a/reference/test/solver/fcg_kernels.cpp b/reference/test/solver/fcg_kernels.cpp index 5be80feb2ec..8e15966a9c6 100644 --- a/reference/test/solver/fcg_kernels.cpp +++ b/reference/test/solver/fcg_kernels.cpp @@ -509,7 +509,7 @@ TYPED_TEST(Fcg, SolvesBigDenseSystem2) template -gko::remove_complex infNorm(gko::matrix::Dense *mat, size_t col = 0) +gko::remove_complex infNorm(gko::matrix::Dense* mat, size_t col = 0) { using std::abs; using no_cpx_t = gko::remove_complex; diff --git a/reference/test/solver/gmres_kernels.cpp b/reference/test/solver/gmres_kernels.cpp index f64523aaabb..b055eb10897 100644 --- a/reference/test/solver/gmres_kernels.cpp +++ b/reference/test/solver/gmres_kernels.cpp @@ -366,7 +366,7 @@ TYPED_TEST(Gmres, SolveWithImplicitResNormCritIsDisabled) template -gko::remove_complex infNorm(gko::matrix::Dense *mat, size_t col = 0) +gko::remove_complex infNorm(gko::matrix::Dense* mat, size_t col = 0) { using std::abs; using no_cpx_t = gko::remove_complex; diff --git a/reference/test/solver/idr_kernels.cpp b/reference/test/solver/idr_kernels.cpp index 59c76b8c5c6..e3287cbade3 100644 --- a/reference/test/solver/idr_kernels.cpp +++ b/reference/test/solver/idr_kernels.cpp @@ -420,7 +420,7 @@ TYPED_TEST(Idr, SolvesBigDenseSystemForDivergenceCheck2) template -gko::remove_complex infNorm(gko::matrix::Dense *mat, size_t col = 0) +gko::remove_complex infNorm(gko::matrix::Dense* mat, size_t col = 0) { using std::abs; using no_cpx_t = gko::remove_complex; diff --git a/test/matrix/dense_kernels.cpp b/test/matrix/dense_kernels.cpp index 1b7eb9d2b91..956653d1c8e 100644 --- a/test/matrix/dense_kernels.cpp +++ b/test/matrix/dense_kernels.cpp @@ -158,7 +158,7 @@ class Dense : public ::testing::Test { std::shuffle(tmp2.begin(), tmp2.end(), rng); std::vector tmp3(x->get_size()[0] / 10); std::uniform_int_distribution row_dist(0, x->get_size()[0] - 1); - for (auto &i : tmp3) { + for (auto& i : tmp3) { i = row_dist(rng); } rpermute_idxs = @@ -170,7 +170,7 @@ class Dense : public ::testing::Test { } template - std::unique_ptr convert(InputType &&input) + std::unique_ptr convert(InputType&& input) { auto result = ConvertedType::create(input->get_executor()); input->convert_to(result.get()); @@ -728,8 +728,8 @@ TEST_F(Dense, IsPermutable) auto permuted = square->permute(rpermute_idxs.get()); auto dpermuted = dsquare->permute(rpermute_idxs.get()); - GKO_ASSERT_MTX_NEAR(static_cast(permuted.get()), - static_cast(dpermuted.get()), 0); + GKO_ASSERT_MTX_NEAR(static_cast(permuted.get()), + static_cast(dpermuted.get()), 0); } @@ -755,8 +755,8 @@ TEST_F(Dense, IsInversePermutable) auto permuted = square->inverse_permute(rpermute_idxs.get()); auto dpermuted = dsquare->inverse_permute(rpermute_idxs.get()); - GKO_ASSERT_MTX_NEAR(static_cast(permuted.get()), - static_cast(dpermuted.get()), 0); + GKO_ASSERT_MTX_NEAR(static_cast(permuted.get()), + static_cast(dpermuted.get()), 0); } @@ -782,8 +782,8 @@ TEST_F(Dense, IsRowPermutable) auto r_permute = x->row_permute(rpermute_idxs.get()); auto dr_permute = dx->row_permute(rpermute_idxs.get()); - GKO_ASSERT_MTX_NEAR(static_cast(r_permute.get()), - static_cast(dr_permute.get()), 0); + GKO_ASSERT_MTX_NEAR(static_cast(r_permute.get()), + static_cast(dr_permute.get()), 0); } @@ -808,8 +808,8 @@ TEST_F(Dense, IsColPermutable) auto c_permute = x->column_permute(cpermute_idxs.get()); auto dc_permute = dx->column_permute(cpermute_idxs.get()); - GKO_ASSERT_MTX_NEAR(static_cast(c_permute.get()), - static_cast(dc_permute.get()), 0); + GKO_ASSERT_MTX_NEAR(static_cast(c_permute.get()), + static_cast(dc_permute.get()), 0); } @@ -834,8 +834,8 @@ TEST_F(Dense, IsInverseRowPermutable) auto inverse_r_permute = x->inverse_row_permute(rpermute_idxs.get()); auto d_inverse_r_permute = dx->inverse_row_permute(rpermute_idxs.get()); - GKO_ASSERT_MTX_NEAR(static_cast(inverse_r_permute.get()), - static_cast(d_inverse_r_permute.get()), 0); + GKO_ASSERT_MTX_NEAR(static_cast(inverse_r_permute.get()), + static_cast(d_inverse_r_permute.get()), 0); } @@ -860,8 +860,8 @@ TEST_F(Dense, IsInverseColPermutable) auto inverse_c_permute = x->inverse_column_permute(cpermute_idxs.get()); auto d_inverse_c_permute = dx->inverse_column_permute(cpermute_idxs.get()); - GKO_ASSERT_MTX_NEAR(static_cast(inverse_c_permute.get()), - static_cast(d_inverse_c_permute.get()), 0); + GKO_ASSERT_MTX_NEAR(static_cast(inverse_c_permute.get()), + static_cast(d_inverse_c_permute.get()), 0); } diff --git a/test/test_install/test_install.cpp b/test/test_install/test_install.cpp index cd2ac76855d..3ea38fab2d7 100644 --- a/test/test_install/test_install.cpp +++ b/test/test_install/test_install.cpp @@ -46,8 +46,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -void assert_similar_matrices(const gko::matrix::Dense<> *m1, - const gko::matrix::Dense<> *m2, double prec) +void assert_similar_matrices(const gko::matrix::Dense<>* m1, + const gko::matrix::Dense<>* m2, double prec) { assert(m1->get_size()[0] == m2->get_size()[0]); assert(m1->get_size()[1] == m2->get_size()[1]); @@ -61,8 +61,8 @@ void assert_similar_matrices(const gko::matrix::Dense<> *m1, template void check_spmv(std::shared_ptr exec, - const gko::matrix_data &A_raw, - const gko::matrix::Dense<> *b, gko::matrix::Dense<> *x) + const gko::matrix_data& A_raw, + const gko::matrix::Dense<>* b, gko::matrix::Dense<>* x) { auto test = Mtx::create(exec); #if HAS_REFERENCE @@ -90,8 +90,8 @@ void check_spmv(std::shared_ptr exec, template void check_solver(std::shared_ptr exec, - const gko::matrix_data &A_raw, - const gko::matrix::Dense<> *b, gko::matrix::Dense<> *x) + const gko::matrix_data& A_raw, + const gko::matrix::Dense<>* b, gko::matrix::Dense<>* x) { using Mtx = gko::matrix::Csr<>; auto A = gko::share(Mtx::create(exec, std::make_shared())); diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp index 9cb4238aed6..05841e1d0da 100644 --- a/test/utils/executor.hpp +++ b/test/utils/executor.hpp @@ -43,14 +43,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. void init_executor(std::shared_ptr ref, - std::shared_ptr &exec) + std::shared_ptr& exec) { exec = gko::OmpExecutor::create(); } void init_executor(std::shared_ptr ref, - std::shared_ptr &exec) + std::shared_ptr& exec) { ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0); exec = gko::CudaExecutor::create(0, ref); @@ -58,7 +58,7 @@ void init_executor(std::shared_ptr ref, void init_executor(std::shared_ptr ref, - std::shared_ptr &exec) + std::shared_ptr& exec) { ASSERT_GT(gko::HipExecutor::get_num_devices(), 0); exec = gko::HipExecutor::create(0, ref); @@ -66,7 +66,7 @@ void init_executor(std::shared_ptr ref, void init_executor(std::shared_ptr ref, - std::shared_ptr &exec) + std::shared_ptr& exec) { if (gko::DpcppExecutor::get_num_devices("gpu") > 0) { exec = gko::DpcppExecutor::create(0, ref, "gpu");