Skip to content

Commit

Permalink
Update vir simd 0.4 (#404)
Browse files Browse the repository at this point in the history
* Fix qa_Converter when targeting AVX-512

Only convert simd types that are supported by the implementation.

* CMake: Improve CMAKE_BUILD_TYPE usage and docs

* Replace meta::simdize by vir::simdize; adjust implementation accordingly

* Bump to vir-simd v0.4.0
* Replace gr::meta::simdize with vir::simdize.
* Replace gr::meta::simdize_size_v with ::size() on simdized types.
* "Document" that vir::transform can be used, where std::transform and
std::ranges::transform are documented.

* Remove all offsets from processOne(_simd)

Remove:
- invokeProcessOneWithOrWithoutOffset
- exact_argument_type
- can_processOne_with_offset_invoke_test
- can_processOne_simd_with_offset
- can_processOne_scalar_with_offset
- can_processOne_with_offset

Modify MergedGraph apply_left, apply_right, processOne, and
processOne_simd to not require an offset argument anymore.

Modify all calls to processOne(_simd) to never pass an offset.

Modify TagMonitors processOne to generateTag without offset. The
published tag is now unconditional 0 all the time.

* Fix non-const processOne signatures to not accept simd arguments

Either the function is const and can work with chunks of inputs, or it
is non-const and cannot work with chunks of input. These functions were
none of these.

* Non-const processOne cannot be called in a SIMD-loop

* Make nosonar_node_api non-SIMD-only

With `sink::processOne` being non-const the merged graphs in this
benchmark are always non-SIMD. Consequently the processOne_simd calls
become ill-formed and need to be removed. Thus, there's no difference
anymore when compiling with DISABLE_SIMD and that extra benchmark
executable can go.

Signed-off-by: Matthias Kretz <m.kretz@gsi.de>
  • Loading branch information
mattkretz authored Sep 26, 2024
1 parent 1e41e38 commit 4bb2740
Show file tree
Hide file tree
Showing 16 changed files with 127 additions and 284 deletions.
6 changes: 4 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,11 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "(Clang|GNU|Intel)")
- Debug: best/complete debugging experience; as optimized as reasonable\n\
- Release: full optimization; some runtime checks disabled\n\
- RelWithAssert: full optimization; runtime checks enabled\n\
- RelWithDebInfo: optimized; debug info; some runtime checks disabled"
- RelWithDebInfo: optimized; debug info; some runtime checks disabled\n\
- MinSizeRel: optimized with a focus on minimal code size"
FORCE)
endif (NOT CMAKE_BUILD_TYPE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS None Debug Release RelWithAssert RelWithDebInfo MinSizeRel)

if (CMAKE_BUILD_TYPE STREQUAL "" AND NOT CMAKE_CXX_FLAGS MATCHES "-O[123gs]")
message(WARNING "It seems you are compiling without optimization. Please set CMAKE_BUILD_TYPE or CMAKE_CXX_FLAGS.")
Expand Down Expand Up @@ -218,7 +220,7 @@ FetchContent_Declare(
FetchContent_Declare(
vir-simd
GIT_REPOSITORY https://github.com/mattkretz/vir-simd.git
GIT_TAG v0.2.0
GIT_TAG v0.4.0
)

FetchContent_Declare(
Expand Down
3 changes: 2 additions & 1 deletion blocks/basic/test/qa_Converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ const boost::ut::suite<"basic Conversion tests"> basicConversion = [] {

"up-convert std::simd<std::uint8_t> to ..."_test = []<typename R>(R /*noop*/) {
using T = uint8_t;
using V = stdx::native_simd<T>;
// careful here: max_fixed_size is 32 *except with AVX-512 and sizeof(T) == 1* where it is 64.
using V = std::conditional_t<stdx::native_simd<T>::size() <= stdx::simd_abi::max_fixed_size<R>, stdx::native_simd<T>, stdx::simd<T, stdx::simd_abi::deduce_t<T, stdx::simd_abi::max_fixed_size<R>>>>;
using RetType = stdx::rebind_simd_t<R, V>;

using TConvert = std::conditional_t<kIsScalingBlock, ScalingConvert<T, R>, Convert<T, R>>;
Expand Down
9 changes: 2 additions & 7 deletions blocks/testing/include/gnuradio-4.0/testing/NullSources.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,8 @@ Commonly used for testing scenarios and signal termination where output is unnec

void reset() { count = 0U; }

template<gr::meta::t_or_simd<T> V>
void processOne(V) noexcept {
if constexpr (stdx::is_simd_v<V>) {
count += V::size();
} else {
count++;
}
void processOne(T) noexcept {
count++;
if (n_samples_max > 0 && count >= n_samples_max) {
this->requestStop();
}
Expand Down
41 changes: 5 additions & 36 deletions blocks/testing/include/gnuradio-4.0/testing/TagMonitors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ namespace gr::testing {

enum class ProcessFunction {
USE_PROCESS_BULK = 0, ///
USE_PROCESS_ONE = 1, ///
USE_PROCESS_ONE_SIMD = 2 ///
USE_PROCESS_ONE = 1 ///
};

inline constexpr void print_tag(const Tag& tag, std::string_view prefix = {}) noexcept {
Expand Down Expand Up @@ -119,10 +118,10 @@ struct TagSource : public Block<TagSource<T, UseProcessVariant>> {
}
}

T processOne(std::size_t offset) noexcept
T processOne() noexcept
requires(UseProcessVariant == ProcessFunction::USE_PROCESS_ONE)
{
const auto [tagGenerated, tagRepeatStarted] = generateTag("processOne(...)", offset);
const auto [tagGenerated, tagRepeatStarted] = generateTag("processOne(...)");
_nSamplesProduced++;
if (!isInfinite() && _nSamplesProduced >= n_samples_max) {
this->requestStop();
Expand Down Expand Up @@ -192,7 +191,7 @@ struct TagSource : public Block<TagSource<T, UseProcessVariant>> {
}

private:
[[nodiscard]] auto generateTag(std::string_view processFunctionName, std::size_t offset = 0) {
[[nodiscard]] auto generateTag(std::string_view processFunctionName) {
struct {
bool tagGenerated = false;
bool tagRepeatStarted = false;
Expand All @@ -203,7 +202,7 @@ struct TagSource : public Block<TagSource<T, UseProcessVariant>> {
if (verbose_console) {
print_tag(_tags[_tagIndex], fmt::format("{}::{}\t publish tag at {:6}", this->name.value, processFunctionName, _nSamplesProduced));
}
out.publishTag(_tags[_tagIndex].map, static_cast<Tag::signed_index_type>(offset)); // indices > 0 write tags in the future ... handle with care
out.publishTag(_tags[_tagIndex].map, static_cast<Tag::signed_index_type>(0)); // indices > 0 write tags in the future ... handle with care
this->_outputTagsChanged = true;
_tagIndex++;
if (repeat_tags && _tagIndex == _tags.size()) {
Expand Down Expand Up @@ -270,36 +269,6 @@ struct TagMonitor : public Block<TagMonitor<T, UseProcessVariant>> {
return input;
}

template<gr::meta::t_or_simd<T> V>
[[nodiscard]] constexpr V processOne(const V& input) noexcept // to note: the SIMD-version does not support adding tags mid-way since this is chunked at V::size()
requires(UseProcessVariant == ProcessFunction::USE_PROCESS_ONE_SIMD)
{
if (this->input_tags_present()) {
const Tag& tag = this->mergedInputTag();
if (verbose_console) {
print_tag(tag, fmt::format("{}::processOne(...)\t received tag at {:6}", this->name, _nSamplesProduced));
}
if (log_tags) {
_tags.emplace_back(_nSamplesProduced, tag.map);
}
}
if (log_samples) {
if constexpr (gr::meta::any_simd<V>) {
alignas(stdx::memory_alignment_v<stdx::native_simd<T>>) std::array<T, V::size()> mem = {};
input.copy_to(&mem[0], stdx::vector_aligned);
_samples.insert(_samples.end(), mem.begin(), mem.end());
} else {
_samples.emplace_back(input);
}
}
if constexpr (gr::meta::any_simd<V>) {
_nSamplesProduced += static_cast<gr::Size_t>(V::size());
} else {
_nSamplesProduced++;
}
return input;
}

constexpr work::Status processBulk(std::span<const T> input, std::span<T> output) noexcept
requires(UseProcessVariant == ProcessFunction::USE_PROCESS_BULK)
{
Expand Down
13 changes: 4 additions & 9 deletions blocks/testing/include/gnuradio-4.0/testing/bm_test_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ struct source : public gr::Block<source<T, min, count>> {
}

[[nodiscard]] constexpr auto
processOne_simd(auto N) const noexcept -> gr::meta::simdize<T, decltype(N)::value> {
processOne_simd(auto N) const noexcept -> vir::simdize<T, decltype(N)::value> {
n_samples_produced += N;
gr::meta::simdize<T, N> x{};
vir::simdize<T, N> x{};
benchmark::force_to_memory(x);
return x;
}
Expand All @@ -51,9 +51,8 @@ struct sink : public gr::Block<sink<T, N_MIN, N_MAX>> {
uint64_t should_receive_n_samples = 0;
int64_t _last_tag_position = -1;

template<gr::meta::t_or_simd<T> V>
[[nodiscard]] constexpr auto
processOne(V a) noexcept {
processOne(T a) noexcept {
// optional user-level tag processing
if (this->input_tags_present()) {
if (this->input_tags_present() && this->mergedInputTag().map.contains("N_SAMPLES_MAX")) {
Expand All @@ -65,11 +64,7 @@ struct sink : public gr::Block<sink<T, N_MIN, N_MAX>> {
}
}

if constexpr (gr::meta::any_simd<V>) {
n_samples_consumed += V::size();
} else {
n_samples_consumed++;
}
n_samples_consumed++;
benchmark::force_store(a);
}
};
Expand Down
5 changes: 0 additions & 5 deletions core/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,3 @@ add_gr_benchmark(bm_Scheduler)
add_gr_benchmark(bm-nosonar_node_api)
add_gr_benchmark(bm_fft)
target_link_libraries(bm_fft PRIVATE gr-fourier)

add_executable(bm-nosonar_node_api_nosimd bm-nosonar_node_api.cpp)
append_compiler_flags(bm-nosonar_node_api_nosimd)
target_compile_options(bm-nosonar_node_api_nosimd PRIVATE -DDISABLE_SIMD=1)
target_link_libraries(bm-nosonar_node_api_nosimd PRIVATE gnuradio-options gnuradio-core refl-cpp fmt ut ut-benchmark gr-basic gr-math gr-testing)
17 changes: 8 additions & 9 deletions core/benchmarks/bm-nosonar_node_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ struct math_bulk_op : public gr::Block<math_bulk_op<T, op>, gr::PortInNamed<T, "
// C++20 ranges
// std::ranges::transform(input, output.begin(), [this](const T& elem) { return processOne(elem); });

// vir-simd execution policy
// vir::transform(vir::execution::simd, input, output, [this](const auto &elem) {
// return processOne(elem);
// });

return gr::work::Status::OK;
}
};
Expand Down Expand Up @@ -325,16 +330,9 @@ void loop_over_processOne(auto& node) {
using namespace benchmark;
bm::test::n_samples_produced = 0LU;
bm::test::n_samples_consumed = 0LU;
#if DISABLE_SIMD
for (std::size_t i = 0; i < N_SAMPLES; i++) {
node.processOne(i);
}
#else
constexpr int N = 32;
for (std::size_t i = 0; i < N_SAMPLES / N; i++) {
node.template processOne_simd(i, std::integral_constant<std::size_t, N>{});
node.processOne();
}
#endif
expect(eq(bm::test::n_samples_produced, N_SAMPLES)) << "produced too many/few samples";
expect(eq(bm::test::n_samples_consumed, N_SAMPLES)) << "consumed too many/few samples";
}
Expand Down Expand Up @@ -367,7 +365,8 @@ inline const boost::ut::suite _constexpr_bm = [] {
auto mergedBlock = merge<"out", "in">(merge<"out", "in">(bm::test::source<float>({{"n_samples_max", N_SAMPLES}}), copy<float>()), bm::test::sink<float>());
#if !DISABLE_SIMD
static_assert(gr::traits::block::can_processOne_simd<copy<float>>);
static_assert(gr::traits::block::can_processOne_simd<bm::test::sink<float>>);
// bm::test::sink cannot process SIMD because it wants to be non-const
static_assert(not gr::traits::block::can_processOne_simd<bm::test::sink<float>>);
#endif
"merged src->copy->sink"_benchmark.repeat<N_ITER>(N_SAMPLES) = [&mergedBlock]() { loop_over_processOne(mergedBlock); };
"merged src->copy->sink work"_benchmark.repeat<N_ITER>(N_SAMPLES) = [&mergedBlock]() { loop_over_work(mergedBlock); };
Expand Down
Loading

0 comments on commit 4bb2740

Please sign in to comment.