diff --git a/sycl/doc/extensions/supported/sycl_ext_intel_esimd/README.md b/sycl/doc/extensions/supported/sycl_ext_intel_esimd/README.md index 9241a5231f6f0..61b1b09b90e61 100644 --- a/sycl/doc/extensions/supported/sycl_ext_intel_esimd/README.md +++ b/sycl/doc/extensions/supported/sycl_ext_intel_esimd/README.md @@ -84,7 +84,11 @@ the same application. ### SYCL and ESIMD interoperability SYCL kernels can call ESIMD functions using the special `invoke_simd` API. + +More examples are available [here](./examples/) + More details are available in [invoke_simd spec](../../experimental/sycl_ext_oneapi_invoke_simd.asciidoc) + Test cases are available [here](../../../../test-e2e/InvokeSimd/) ```cpp diff --git a/sycl/doc/extensions/supported/sycl_ext_intel_esimd/examples/README.md b/sycl/doc/extensions/supported/sycl_ext_intel_esimd/examples/README.md index 917752f03df50..a56ea4b1d7dfd 100644 --- a/sycl/doc/extensions/supported/sycl_ext_intel_esimd/examples/README.md +++ b/sycl/doc/extensions/supported/sycl_ext_intel_esimd/examples/README.md @@ -26,5 +26,43 @@ is to show the basic ESIMD APIs in well known examples. }); }).wait_and_throw(); ``` +2) Calling ESIMD from SYCL using invoke_simd - ["invoke_simd"](./invoke_simd.md). + Please see the full source code here: ["invoke_simd"](./invoke_simd.md) + ```c++ + [[intel::device_indirectly_callable]] simd __regcall scale( + simd x, int n) SYCL_ESIMD_FUNCTION { + esimd::simd vec = x; + esimd::simd result = vec * n; + return result; + } + + int main(void) { + int *in = new int[SIZE]; + int *out = new int[SIZE]; + buffer bufin(in, range<1>(SIZE)); + buffer bufout(out, range<1>(SIZE)); + + // scale factor + int n = 2; + + sycl::range<1> GlobalRange{SIZE}; + sycl::range<1> LocalRange{VL}; + + q.submit([&](handler &cgh) { + auto accin = bufin.get_access(cgh); + auto accout = bufout.get_access(cgh); + + cgh.parallel_for( + nd_range<1>(GlobalRange, LocalRange), [=](nd_item<1> item) { + sycl::sub_group sg = item.get_sub_group(); + unsigned int offset = item.get_global_linear_id(); + + int in_val = sg.load(accin.get_pointer() + offset); + + int out_val = invoke_simd(sg, scale, in_val, uniform{n}); -2) TODO: Add more examples here. + sg.store(accout.get_pointer() + offset, out_val); + }); + }); + ``` +3) TODO: Add more examples here. diff --git a/sycl/doc/extensions/supported/sycl_ext_intel_esimd/examples/invoke_simd.md b/sycl/doc/extensions/supported/sycl_ext_intel_esimd/examples/invoke_simd.md new file mode 100644 index 0000000000000..d922fa39e949d --- /dev/null +++ b/sycl/doc/extensions/supported/sycl_ext_intel_esimd/examples/invoke_simd.md @@ -0,0 +1,95 @@ +## Call ESIMD from SYCL using invoke_simd + +In this example, we will scale the input data by a factor of 2 using `invoke_simd`. + +Compile and run: +```bash +> clang++ -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr invoke_simd.cpp + +> IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 ONEAPI_DEVICE_SELECTOR=level_zero:gpu ./a.out +Running on Intel(R) UHD Graphics 630 +Passed +``` +Source code: +```c++ +#include +#include +#include +#include + +using namespace sycl; +using namespace sycl::ext::oneapi::experimental; +namespace esimd = sycl::ext::intel::esimd; + +constexpr int SIZE = 512; +constexpr int VL = 16; + +[[intel::device_indirectly_callable]] simd __regcall scale( + simd x, int n) SYCL_ESIMD_FUNCTION { + esimd::simd vec = x; + esimd::simd result = vec * n; + return result; +} + +int main(void) { + auto q = queue{gpu_selector_v}; + auto dev = q.get_device(); + std::cout << "Running on " << dev.get_info() + << "\n"; + bool passed = true; + int *in = new int[SIZE]; + int *out = new int[SIZE]; + + for (int i = 0; i < SIZE; ++i) { + in[i] = i; + out[i] = 0; + } + + // scale factor + int n = 2; + + try { + buffer bufin(in, range<1>(SIZE)); + buffer bufout(out, range<1>(SIZE)); + + sycl::range<1> GlobalRange{SIZE}; + sycl::range<1> LocalRange{VL}; + + auto e = q.submit([&](handler &cgh) { + auto accin = bufin.get_access(cgh); + auto accout = bufout.get_access(cgh); + + cgh.parallel_for( + nd_range<1>(GlobalRange, LocalRange), [=](nd_item<1> item) { + sycl::sub_group sg = item.get_sub_group(); + unsigned int offset = item.get_global_linear_id(); + + int in_val = sg.load(accin.get_pointer() + offset); + + int out_val = invoke_simd(sg, scale, in_val, uniform{n}); + + sg.store(accout.get_pointer() + offset, out_val); + }); + }); + e.wait(); + } catch (sycl::exception const &e) { + delete[] in; + delete[] out; + std::cout << "SYCL exception caught: " << e.what() << '\n'; + return 1; + } + + for (int i = 0; i < SIZE; ++i) { + if (out[i] != in[i] * n) { + std::cout << "failed at index " << i << ", " << out[i] << " != " << in[i] + << " * " << n << "\n"; + passed = false; + } + } + delete[] in; + delete[] out; + std::cout << (passed ? "Passed\n" : "FAILED\n"); + return passed ? 0 : 1; +} + +```