Skip to content

Commit

Permalink
Set file flag only.
Browse files Browse the repository at this point in the history
  • Loading branch information
zwd-ms committed Nov 10, 2022
1 parent 2ea0420 commit 358b33b
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 14 deletions.
7 changes: 2 additions & 5 deletions cmake/VWFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,13 @@ if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64")
endif()
endif()

# TODO: Hide this behind some compile flag.
set(LINUX_AVX512_FLAGS -mfma -mavx512f -mavx512bw -mavx512vl -mavx512vpopcntdq)

# Add -ffast-math for speed, remove for testability.
# no-stack-check is added to mitigate stack alignment issue on Catalina where there is a bug with aligning stack-check instructions, and stack-check became default option
set(LINUX_RELEASE_CONFIG -fno-strict-aliasing ${LINUX_X86_64_OPT_FLAGS} -fno-stack-check -fomit-frame-pointer)
set(LINUX_DEBUG_CONFIG -fno-stack-check)

#Use default visiblity on UNIX otherwise a lot of the C++ symbols end up for exported and interpose'able
set(VW_LINUX_FLAGS $<$<CONFIG:Debug>:${LINUX_DEBUG_CONFIG}> $<$<CONFIG:Release>:${LINUX_RELEASE_CONFIG}> $<$<CONFIG:RelWithDebInfo>:${LINUX_RELEASE_CONFIG}> ${LINUX_AVX512_FLAGS})
# Use default visiblity on UNIX otherwise a lot of the C++ symbols end up for exported and interpose'able
set(VW_LINUX_FLAGS $<$<CONFIG:Debug>:${LINUX_DEBUG_CONFIG}> $<$<CONFIG:Release>:${LINUX_RELEASE_CONFIG}> $<$<CONFIG:RelWithDebInfo>:${LINUX_RELEASE_CONFIG}>)
set(VW_WIN_FLAGS /MP /Zc:__cplusplus)

# Turn on warnings
Expand Down
6 changes: 2 additions & 4 deletions test/benchmarks/standalone/benchmark_text_input.cc
Original file line number Diff line number Diff line change
Expand Up @@ -292,8 +292,7 @@ BENCHMARK_CAPTURE(benchmark_multi_predict, cb_las_300actions_20features_1thread,
->UseRealTime()
->Unit(benchmark::kMillisecond);

BENCHMARK_CAPTURE(benchmark_multi_predict, cb_las_300actions,
gen_cb_examples(1, 50, 10, 300, 5, 5, 20, 10, false),
BENCHMARK_CAPTURE(benchmark_multi_predict, cb_las_300actions, gen_cb_examples(1, 50, 10, 300, 5, 5, 20, 10, false),
"--cb_explore_adf --large_action_space -q :: --max_actions 20 --quiet")
->MinTime(15.0)
->UseRealTime()
Expand All @@ -313,8 +312,7 @@ BENCHMARK_CAPTURE(benchmark_multi_predict, cb_las_300actions_plaincb,
->UseRealTime()
->Unit(benchmark::kMillisecond);

BENCHMARK_CAPTURE(benchmark_multi_predict, cb_las_500actions,
gen_cb_examples(1, 50, 10, 500, 5, 5, 20, 10, false),
BENCHMARK_CAPTURE(benchmark_multi_predict, cb_las_500actions, gen_cb_examples(1, 50, 10, 500, 5, 5, 20, 10, false),
"--cb_explore_adf --large_action_space -q :: --max_actions 20 --quiet")
->MinTime(15.0)
->UseRealTime()
Expand Down
3 changes: 3 additions & 0 deletions test/unit_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ add_executable(vw-unit-test.out
weights_test.cc
)

# TODO: Hide behind some compile flag.
set_source_files_properties( cb_las_one_pass_svd_test.cc PROPERTIES COMPILE_FLAGS "-mfma -mavx512f -mavx512bw -mavx512vl -mavx512vpopcntdq" )

# Add the include directories from vw target for testing
target_link_libraries(vw-unit-test.out PRIVATE vw_core Boost::unit_test_framework)
target_include_directories(vw-unit-test.out PRIVATE $<TARGET_PROPERTY:vw_core,INCLUDE_DIRECTORIES>)
Expand Down
44 changes: 41 additions & 3 deletions test/unit_test/cb_las_one_pass_svd_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// individual contributors. All rights reserved. Released under a BSD (revised)
// license as described in the file LICENSE.

#include "reductions/cb/details/large_action/compute_dot_prod_simd.h"
#include "reductions/cb/details/large_action_space.h"
#include "test_common.h"
#include "vw/core/qr_decomposition.h"
Expand Down Expand Up @@ -91,11 +92,11 @@ BOOST_AUTO_TEST_CASE(check_AO_linear_combination_of_actions)

vws.push_back(vw_rs);

auto* vw_zero_threads_zs = VW::initialize("--cb_explore_adf --large_action_space --max_actions " + std::to_string(d) +
auto* vw_zs = VW::initialize("--cb_explore_adf --large_action_space --max_actions " + std::to_string(d) +
" --quiet --random_seed 0 --noconstant",
nullptr, false, nullptr, nullptr);

vws.push_back(vw_zero_threads_zs);
vws.push_back(vw_zs);

for (auto* vw_ptr : vws)
{
Expand Down Expand Up @@ -174,6 +175,43 @@ BOOST_AUTO_TEST_CASE(check_AO_linear_combination_of_actions)
}
}

// TODO: Add tests for vectorized implementation.
// TODO: Hide behind some compile flag.
BOOST_AUTO_TEST_CASE(compute16_has_same_results_with_16_compute1)
{
constexpr uint64_t column_index = 666;
constexpr uint64_t seed = 233;
constexpr uint64_t weights_len = (1 << 18);
constexpr uint64_t weights_mask = weights_len - 1;
constexpr uint64_t offset = 4;

constexpr int num_features = 16;
std::vector<uint64_t> feature_indices(num_features);
std::vector<float> feature_values(num_features);
for (int i = 0; i < num_features; ++i)
{
feature_indices[i] = rand() % weights_len;
feature_values[i] = rand() / static_cast<float>(RAND_MAX);
}

std::vector<float> expected_results(num_features, 0.f);
for (int i = 0; i < num_features; ++i)
{
VW::cb_explore_adf::compute1(
feature_values[i], feature_indices[i], offset, weights_mask, column_index, seed, expected_results[i]);
}

__m512 sums = _mm512_setzero_ps();
const __m512i column_indices = _mm512_set1_epi64(column_index);
const __m512i seeds = _mm512_set1_epi64(seed);
const __m512i weights_masks = _mm512_set1_epi64(weights_mask);
const __m512i offsets = _mm512_set1_epi64(offset);
const __m512 values = _mm512_loadu_ps(&feature_values[0]);
const __m512i indices1 = _mm512_loadu_si512(&feature_indices[0]);
const __m512i indices2 = _mm512_loadu_si512(&feature_indices[8]);
VW::cb_explore_adf::compute16(values, indices1, indices2, offsets, weights_masks, column_indices, seeds, sums);

const float* results = reinterpret_cast<float*>(&sums);
for (int i = 0; i < num_features; ++i) { BOOST_CHECK_CLOSE(results[i], expected_results[i], FLOAT_TOL); }
}

BOOST_AUTO_TEST_SUITE_END()
3 changes: 3 additions & 0 deletions vowpalwabbit/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,9 @@ set(vw_core_sources
src/vw_validate.cc
)

# TODO: Hide behind some compile flag.
set_source_files_properties( src/reductions/cb/details/large_action/one_pass_svd_impl.cc PROPERTIES COMPILE_FLAGS "-mfma -mavx512f -mavx512bw -mavx512vl -mavx512vpopcntdq" )

vw_add_library(
NAME "core"
TYPE "STATIC_ONLY"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ void one_pass_svd_impl::generate_AOmega(const multi_ex& examples, const std::vec
#ifdef __linux__
// Only works for linux for now.
// TODO: Expose this parameter.
bool use_simd = false;
bool use_simd = true;
auto compute_dot_prod = use_simd ? compute_dot_prod_simd : compute_dot_prod_scalar;
#else
// TODO: Make simd work with msvc.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include "vw/core/v_array.h"
#include "vw/core/vw_fwd.h"

// Eigen explicit vectorization does not work with smaller ALIGN_BYTES.
// Eigen explicit vectorization does not work with AVX512 when using smaller MAX_ALIGN_BYTES. For more info:
// https://eigen.tuxfamily.org/dox/TopicPreprocessorDirectives.html#TopicPreprocessorDirectivesPerformance
#define EIGEN_MAX_ALIGN_BYTES 32

Expand Down

0 comments on commit 358b33b

Please sign in to comment.