Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/SYCLomatic' into blas2
Browse files Browse the repository at this point in the history
  • Loading branch information
zhiweij1 committed Sep 19, 2024
2 parents 1eee195 + 94d5aad commit 30a1773
Show file tree
Hide file tree
Showing 19 changed files with 520 additions and 199 deletions.
2 changes: 1 addition & 1 deletion clang/lib/DPCT/AnalysisInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2906,7 +2906,7 @@ void MemVarInfo::newConstVarInit(const VarDecl *Var) {
std::string MemVarInfo::getDeclarationReplacement(const VarDecl *VD) {
switch (Scope) {
case clang::dpct::MemVarInfo::Local:
if (DpctGlobalInfo::useGroupLocalMemory() && VD) {
if (isShared() && DpctGlobalInfo::useGroupLocalMemory() && VD) {

auto FD = dyn_cast<FunctionDecl>(VD->getDeclContext());
if (FD && FD->hasAttr<CUDADeviceAttr>())
Expand Down
6 changes: 5 additions & 1 deletion clang/lib/DPCT/DPCT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1335,7 +1335,11 @@ int runDPCT(int argc, const char **argv) {
if (DpctGlobalInfo::getBuildScript() == BuildScriptKind::BS_Cmake) {
loadMainSrcFileInfo(OutRootPath);
collectCmakeScripts(InRootPath, OutRootPath);
doCmakeScriptMigration(InRootPath, OutRootPath);
runWithCrashGuard(
[&]() { doCmakeScriptMigration(InRootPath, OutRootPath); },
"Error: dpct internal error. Migrating CMake scripts in \"" +
InRootPath.getCanonicalPath().str() +
"\" causing the error skipped. Migration continues.\n");

if (cmakeScriptNotFound()) {
std::cout << CmakeScriptMigrationHelpHint << "\n";
Expand Down
19 changes: 12 additions & 7 deletions clang/lib/DPCT/Rewriters/Math/RewriterHalf2ArithmeticFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,13 +236,18 @@ RewriterMap dpct::createHalf2ArithmeticFunctionsRewriterMap() {
LITERAL("{0.f, 0.f}"), LITERAL("{1.f, 1.f}"))))),
MATH_API_REWRITER_EXPERIMENTAL_BFLOAT16(
"__hfma2_sat",
CALL_FACTORY_ENTRY(
"__hfma2_sat",
CALL(MapNames::getDpctNamespace() + "clamp",
CALL(MapNames::getClNamespace(false, true) +
"ext::oneapi::experimental::fma",
ARG(0), ARG(1), ARG(2)),
LITERAL("{0.f, 0.f}"), LITERAL("{1.f, 1.f}"))),
CONDITIONAL_FACTORY_ENTRY(
UseSYCLCompat,
UNSUPPORT_FACTORY_ENTRY("__hfma2_sat",
Diagnostics::UNSUPPORT_SYCLCOMPAT,
LITERAL("__hfma2_sat")),
CALL_FACTORY_ENTRY(
"__hfma2_sat",
CALL(MapNames::getDpctNamespace() + "clamp",
CALL(MapNames::getClNamespace(false, true) +
"ext::oneapi::experimental::fma",
ARG(0), ARG(1), ARG(2)),
LITERAL("{0.f, 0.f}"), LITERAL("{1.f, 1.f}")))),
CONDITIONAL_FACTORY_ENTRY(
UseSYCLCompat,
UNSUPPORT_FACTORY_ENTRY("__hfma2_sat",
Expand Down
416 changes: 272 additions & 144 deletions clang/lib/DPCT/Rewriters/Math/RewriterSIMDIntrinsics.cpp

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions clang/lib/DPCT/TypeLocRewriters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,9 @@ void initTypeLocSYCLCompatRewriterMap(
SYCLCOMPAT_UNSUPPORT("cudaGraphNode_t")
SYCLCOMPAT_UNSUPPORT("cudaGraphicsResource")
SYCLCOMPAT_UNSUPPORT("cudaGraphicsResource_t")
SYCLCOMPAT_UNSUPPORT("thrust::system::cuda::experimental::pinned_allocator")
SYCLCOMPAT_UNSUPPORT("thrust::cuda::experimental::pinned_allocator")
SYCLCOMPAT_UNSUPPORT("thrust::device_allocator")

#undef SYCLCOMPAT_UNSUPPORT
}
Expand Down
7 changes: 6 additions & 1 deletion clang/lib/Sema/SemaHLSL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -572,9 +572,14 @@ class DiagnoseHLSLAvailability

// Do not access these directly, use the get/set methods below to make
// sure the values are in sync
#ifdef SYCLomatic_CUSTOMIZATION
llvm::Triple::EnvironmentType CurrentShaderEnvironment =
llvm::Triple::EnvironmentType::UnknownEnvironment;
unsigned CurrentShaderStageBit = 0;
#else
llvm::Triple::EnvironmentType CurrentShaderEnvironment;
unsigned CurrentShaderStageBit;

#endif
// True if scanning a function that was already scanned in a different
// shader stage context, and therefore we should not report issues that
// depend only on shader model version because they would be duplicate.
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6356,7 +6356,7 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) {
ArrayRef<TemplateArgument> A = TAL->asArray();
bool FirstParam = true;
O << "<";
for (auto X : A) {
for (const auto &X : A) {
if (FirstParam)
FirstParam = false;
else
Expand Down
5 changes: 2 additions & 3 deletions clang/runtime/dpct-rt/include/dpct/bindless_images.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,7 @@ static inline std::vector<sycl::event> dpct_memcpy_to_host(
sycl::range<3>(w_offset_src / ele_size, h_offset_src, 0);
const auto dest_offset = sycl::range<3>(offset_dest / ele_size, 0, 0);
const auto dest_extend = sycl::range<3>(0, 0, 0);
const auto copy_extend = sycl::range<3>((s - w_offset_src) / ele_size, 1, 0);
const auto copy_extend = sycl::range<3>((s - offset_dest) / ele_size, 1, 0);
event_list.push_back(q.ext_oneapi_copy(src, src_offset, desc_src,
dest_host_ptr, dest_offset,
dest_extend, copy_extend));
Expand Down Expand Up @@ -691,8 +691,7 @@ static inline std::vector<sycl::event> dpct_memcpy_from_host(
const auto src_extend = sycl::range<3>(0, 0, 0);
const auto dest_offset =
sycl::range<3>(w_offset_dest / ele_size, h_offset_dest, 0);
const auto copy_extend =
sycl::range<3>((s - offset_src - w_offset_dest) / ele_size, 1, 0);
const auto copy_extend = sycl::range<3>((s - offset_src) / ele_size, 1, 0);
// TODO: Remove const_cast after refining the signature of ext_oneapi_copy.
event_list.push_back(q.ext_oneapi_copy(const_cast<void *>(src_host_ptr),
src_offset, src_extend, dest,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ json_stringstream::json_obj::value<json_stringstream::json_obj>() {
return js.object();
}

template <typename T> std::string demangle_name() {
template <typename T> inline std::string demangle_name() {
std::string ret_str = "";
#if defined(__linux__)
int s;
Expand All @@ -197,11 +197,11 @@ template <typename T> std::string demangle_name() {
}

#ifdef __NVCC__
template <> std::string demangle_name<__half>() { return "fp16"; }
template <> std::string demangle_name<__nv_bfloat16>() { return "bf16"; }
template <> inline std::string demangle_name<__half>() { return "fp16"; }
template <> inline std::string demangle_name<__nv_bfloat16>() { return "bf16"; }
#else
template <> std::string demangle_name<sycl::half>() { return "fp16"; }
template <> std::string demangle_name<sycl::ext::oneapi::bfloat16>() {
template <> inline std::string demangle_name<sycl::half>() { return "fp16"; }
template <> inline std::string demangle_name<sycl::ext::oneapi::bfloat16>() {
return "bf16";
}
#endif
Expand Down
8 changes: 7 additions & 1 deletion clang/runtime/dpct-rt/include/dpct/image.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -507,14 +507,20 @@ class sampling_info {

public:
sycl::addressing_mode get_addressing_mode() const noexcept {
// Make sure the return value is legal addressing_mode when using memset.
if ((unsigned)_addressing_mode == 0)
return sycl::addressing_mode::clamp_to_edge;
return _addressing_mode;
}
void set(sycl::addressing_mode addressing_mode) noexcept {
_addressing_mode = addressing_mode;
}

sycl::filtering_mode get_filtering_mode() const noexcept {
return _filtering_mode;
// Make sure the return value is legal filtering_mode when using memset.
return _filtering_mode == sycl::filtering_mode::linear
? sycl::filtering_mode::linear
: sycl::filtering_mode::nearest;
}
void set(sycl::filtering_mode filtering_mode) noexcept {
_filtering_mode = filtering_mode;
Expand Down
4 changes: 2 additions & 2 deletions clang/runtime/dpct-rt/include/dpct/lapack_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,8 @@ inline int potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, int n,
&has_execption, api_name, queue, nullptr, info,
matrix_info->group_size_info, oneapi::mkl::lapack::potrs_batch, queue,
&(matrix_info->uplo_info), &(matrix_info->n_info),
&(matrix_info->nrhs_info), (Ty **)a, &(matrix_info->lda_info), (Ty **)b,
&(matrix_info->ldb_info), (std::int64_t)1,
&(matrix_info->nrhs_info), (const Ty *const *)a, &(matrix_info->lda_info),
(Ty **)b, &(matrix_info->ldb_info), (std::int64_t)1,
&(matrix_info->group_size_info), (Ty *)scratchpad,
(std::int64_t)scratchpad_size, empty_events);

Expand Down
5 changes: 3 additions & 2 deletions clang/runtime/dpct-rt/include/dpct/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -740,8 +740,9 @@ inline unsigned vectorized_unary(unsigned a, const UnaryOperation unary_op) {
template <typename VecT>
inline unsigned vectorized_sum_abs_diff(unsigned a, unsigned b) {
sycl::vec<unsigned, 1> v0{a}, v1{b};
auto v2 = v0.as<VecT>();
auto v3 = v1.as<VecT>();
// Need convert element type to wider signed type to avoid overflow.
auto v2 = v0.as<VecT>().template convert<int>();
auto v3 = v1.as<VecT>().template convert<int>();
auto v4 = sycl::abs_diff(v2, v3);
unsigned sum = 0;
for (size_t i = 0; i < v4.size(); ++i) {
Expand Down
38 changes: 38 additions & 0 deletions clang/test/dpct/allocator_syclcompat.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// UNSUPPORTED: cuda-8.0, cuda-12.0, cuda-12.1, cuda-12.2, cuda-12.3, cuda-12.4, cuda-12.5, cuda-12.6
// UNSUPPORTED: v8.0, v12.0, v12.1, v12.2, v12.3, v12.4, v12.5, v12.6
// RUN: dpct --format-range=none --use-syclcompat -out-root %T/allocator_syclcompat %s --cuda-include-path="%cuda-path/include" -- -x cuda --cuda-host-only
// RUN: FileCheck --match-full-lines --input-file %T/allocator_syclcompat/allocator_syclcompat.dp.cpp %s
// RUN: %if build_lit %{icpx -c -fsycl -DBUILD_TEST %T/allocator_syclcompat/allocator_syclcompat.dp.cpp -o %T/allocator_syclcompat/allocator_syclcompat.dp.o %}

#include <algorithm>
#include <cuda_runtime.h>
#include <iostream>
#include <thrust/copy.h>
#include <thrust/device_vector.h>
#include <thrust/device_allocator.h>
#include <thrust/execution_policy.h>
#include <thrust/functional.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/system/cuda/experimental/pinned_allocator.h>
#include <thrust/transform.h>
#include <vector>

#define SIZE 4

template<class T>
int foo() {
#ifndef BUILD_TEST
// CHECK: DPCT1131:{{[0-9]+}}: The migration of "thrust::system::cuda::experimental::pinned_allocator" is not currently supported with SYCLcompat. Please adjust the code manually.
std::vector<float, thrust::system::cuda::experimental::pinned_allocator<float>> hVec(SIZE);

// CHECK: DPCT1131:{{[0-9]+}}: The migration of "thrust::system::cuda::experimental::pinned_allocator" is not currently supported with SYCLcompat. Please adjust the code manually.
std::vector<float, thrust::cuda::experimental::pinned_allocator<float>> hVecCopy = hVec;

// CHECK: DPCT1131:{{[0-9]+}}: The migration of "thrust::device_allocator" is not currently supported with SYCLcompat. Please adjust the code manually.
thrust::device_vector<T, thrust::device_allocator<T>> dvec;
#endif

return 0;
}

template int foo<int>();
13 changes: 13 additions & 0 deletions clang/test/dpct/codepin/link/test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// RUN: %if build_lit %{ icpx -fsycl -c %s -o %T/test.o %}
// RUN: %if build_lit %{ icpx -fsycl -c %S/test2.cpp -o %T/test2.o %}
// RUN: %if build_lit %{ icpx -fsycl %T/test.o %T/test2.o %}

#include <dpct/codepin/serialization/basic.hpp>
#include <sycl/sycl.hpp>
void test() {
dpct::experimental::codepin::detail::demangle_name<sycl::half>();
}

int main() {
return 0;
}
7 changes: 7 additions & 0 deletions clang/test/dpct/codepin/link/test2.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// RUN: echo 0
#include <dpct/codepin/serialization/basic.hpp>
#include <sycl/sycl.hpp>

void test2() {
dpct::experimental::codepin::detail::demangle_name<sycl::half>();
}
3 changes: 3 additions & 0 deletions clang/test/dpct/group_local_memory.cu
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,6 @@ void bar(int *pd, int len) {
int shareSz = 1024;
foo<<<32, 8, shareSz>>>(pd, len);
}

// CHECK: void f(sycl::uint4 x) { }
__global__ void f() { static __constant__ uint4 x = {1, 2, 3, 4}; }
82 changes: 82 additions & 0 deletions clang/test/dpct/math/cuda-math-syclcompat.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
__global__ void kernelFuncBfloat162Arithmetic() {
__nv_bfloat16 bf16, bf16_1, bf16_2, bf16_3;
__nv_bfloat162 bf162, bf162_1, bf162_2, bf162_3;
unsigned u, u_1, u_2;
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__hadd2_sat" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
Expand Down Expand Up @@ -105,5 +106,86 @@ __global__ void kernelFuncBfloat162Arithmetic() {
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__hcmadd" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
h2_2 = __hcmadd(h2, h2_1, h2_2);

// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmpeq2" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmpeq2(u, u_1);
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmpeq4" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmpeq4(u, u_1);
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmpges2" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmpges2(u, u_1);
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmpges4" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmpges4(u, u_1);
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmpgeu2" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmpgeu2(u, u_1);
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmpgeu4" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmpgeu4(u, u_1);
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmpgts2" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmpgts2(u, u_1);
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmpgts4" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmpgts4(u, u_1);
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmpgtu2" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmpgtu2(u, u_1);
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmpgtu4" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmpgtu4(u, u_1);
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmples2" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmples2(u, u_1);
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmples4" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmples4(u, u_1);
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmpleu2" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmpleu2(u, u_1);
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmpleu4" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmpleu4(u, u_1);
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmplts2" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmplts2(u, u_1);
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmplts4" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmplts4(u, u_1);
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmpltu2" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmpltu2(u, u_1);
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmpltu4" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmpltu4(u, u_1);
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmpne2" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmpne2(u, u_1);
// CHECK: /*
// CHECK-NEXT: DPCT1131:{{[0-9]+}}: The migration of "__vcmpne4" is not currently supported with SYCLcompat. Please adjust the code manually.
// CHECK-NEXT: */
u_2 = __vcmpne4(u, u_1);
}
#endif
Loading

0 comments on commit 30a1773

Please sign in to comment.