Skip to content

Commit

Permalink
[SYCL][ESIMD][Doc] Add invoke_simd example (#10052)
Browse files Browse the repository at this point in the history
This change adds an invoke_simd example that scales the input data and
updates related doc to point to it.

---------

Signed-off-by: Sarnie, Nick <nick.sarnie@intel.com>
  • Loading branch information
sarnex authored Jun 23, 2023
1 parent 75a6d09 commit e860b14
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 1 deletion.
4 changes: 4 additions & 0 deletions sycl/doc/extensions/supported/sycl_ext_intel_esimd/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,11 @@ the same application.
### SYCL and ESIMD interoperability

SYCL kernels can call ESIMD functions using the special `invoke_simd` API.

More examples are available [here](./examples/)

More details are available in [invoke_simd spec](../../experimental/sycl_ext_oneapi_invoke_simd.asciidoc)

Test cases are available [here](../../../../test-e2e/InvokeSimd/)

```cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,43 @@ is to show the basic ESIMD APIs in well known examples.
});
}).wait_and_throw();
```
2) Calling ESIMD from SYCL using invoke_simd - ["invoke_simd"](./invoke_simd.md).
Please see the full source code here: ["invoke_simd"](./invoke_simd.md)
```c++
[[intel::device_indirectly_callable]] simd<int, VL> __regcall scale(
simd<int, VL> x, int n) SYCL_ESIMD_FUNCTION {
esimd::simd<int, VL> vec = x;
esimd::simd<int, VL> result = vec * n;
return result;
}
int main(void) {
int *in = new int[SIZE];
int *out = new int[SIZE];
buffer<int, 1> bufin(in, range<1>(SIZE));
buffer<int, 1> bufout(out, range<1>(SIZE));
// scale factor
int n = 2;
sycl::range<1> GlobalRange{SIZE};
sycl::range<1> LocalRange{VL};
q.submit([&](handler &cgh) {
auto accin = bufin.get_access<access::mode::read>(cgh);
auto accout = bufout.get_access<access::mode::write>(cgh);
cgh.parallel_for<class Scale>(
nd_range<1>(GlobalRange, LocalRange), [=](nd_item<1> item) {
sycl::sub_group sg = item.get_sub_group();
unsigned int offset = item.get_global_linear_id();
int in_val = sg.load(accin.get_pointer() + offset);
int out_val = invoke_simd(sg, scale, in_val, uniform{n});
2) TODO: Add more examples here.
sg.store(accout.get_pointer() + offset, out_val);
});
});
```
3) TODO: Add more examples here.
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
## Call ESIMD from SYCL using invoke_simd

In this example, we will scale the input data by a factor of 2 using `invoke_simd`.

Compile and run:
```bash
> clang++ -fsycl -fno-sycl-device-code-split-esimd -Xclang -fsycl-allow-func-ptr invoke_simd.cpp

> IGC_VCSaveStackCallLinkage=1 IGC_VCDirectCallsOnly=1 ONEAPI_DEVICE_SELECTOR=level_zero:gpu ./a.out
Running on Intel(R) UHD Graphics 630
Passed
```
Source code:
```c++
#include <sycl/ext/intel/esimd.hpp>
#include <sycl/ext/oneapi/experimental/invoke_simd.hpp>
#include <sycl/sycl.hpp>
#include <iostream>

using namespace sycl;
using namespace sycl::ext::oneapi::experimental;
namespace esimd = sycl::ext::intel::esimd;

constexpr int SIZE = 512;
constexpr int VL = 16;

[[intel::device_indirectly_callable]] simd<int, VL> __regcall scale(
simd<int, VL> x, int n) SYCL_ESIMD_FUNCTION {
esimd::simd<int, VL> vec = x;
esimd::simd<int, VL> result = vec * n;
return result;
}

int main(void) {
auto q = queue{gpu_selector_v};
auto dev = q.get_device();
std::cout << "Running on " << dev.get_info<sycl::info::device::name>()
<< "\n";
bool passed = true;
int *in = new int[SIZE];
int *out = new int[SIZE];

for (int i = 0; i < SIZE; ++i) {
in[i] = i;
out[i] = 0;
}

// scale factor
int n = 2;

try {
buffer<int, 1> bufin(in, range<1>(SIZE));
buffer<int, 1> bufout(out, range<1>(SIZE));

sycl::range<1> GlobalRange{SIZE};
sycl::range<1> LocalRange{VL};

auto e = q.submit([&](handler &cgh) {
auto accin = bufin.get_access<access::mode::read>(cgh);
auto accout = bufout.get_access<access::mode::write>(cgh);

cgh.parallel_for<class Scale>(
nd_range<1>(GlobalRange, LocalRange), [=](nd_item<1> item) {
sycl::sub_group sg = item.get_sub_group();
unsigned int offset = item.get_global_linear_id();

int in_val = sg.load(accin.get_pointer() + offset);

int out_val = invoke_simd(sg, scale, in_val, uniform{n});

sg.store(accout.get_pointer() + offset, out_val);
});
});
e.wait();
} catch (sycl::exception const &e) {
delete[] in;
delete[] out;
std::cout << "SYCL exception caught: " << e.what() << '\n';
return 1;
}

for (int i = 0; i < SIZE; ++i) {
if (out[i] != in[i] * n) {
std::cout << "failed at index " << i << ", " << out[i] << " != " << in[i]
<< " * " << n << "\n";
passed = false;
}
}
delete[] in;
delete[] out;
std::cout << (passed ? "Passed\n" : "FAILED\n");
return passed ? 0 : 1;
}

```

0 comments on commit e860b14

Please sign in to comment.