diff --git a/src/device/non_caching_load.h b/src/device/non_caching_load.h index 0b42d91da..184452e0d 100644 --- a/src/device/non_caching_load.h +++ b/src/device/non_caching_load.h @@ -73,37 +73,5 @@ __host__ __device__ T __non_caching_load(const T* p) #undef LD } -// template -// inline -// __attribute__((always_inline)) -// __host__ __device__ void __non_caching_load_128b(const T0 ret, const T1* p0) -// { -// #if !defined(__GFX11__) && !defined(GFX12) -// #define LD5 "global_load_dwordx4" -// #if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) -// #define BITS "sc0 sc1 nt" -// #elif defined(__GFX9__) || defined(__gfx1010__) || defined(__gfx1011__) || defined(__gfx1012__) || defined(__gfx1013__) -// #define BITS "glc slc" -// #else -// #define BITS "glc slc dlc" -// #endif -// #define WAIT ((0 << 14) | (0x3f << 8) | (0x7) << 4) -// #else -// #define LD5 "global_load_b128" -// #define BITS "glc slc dlc" -// #define WAIT ((0 << 10) | (0x3f << 4) | 0x7) -// #endif -// #define LOAD5 LD5 " %0 %1 off " BITS - -// asm volatile(LOAD5 : "=v"(ret) : "v"(p0)); -// __builtin_amdgcn_s_waitcnt(WAIT); - -// return; - -// #undef LOAD5 -// #undef WAIT -// #undef BITS -// #undef LD5 -// } - #endif + diff --git a/tools/non-caching-load/non-caching-load.cpp b/tools/non-caching-load/non-caching-load.cpp index c8261ec27..9eb38fa63 100755 --- a/tools/non-caching-load/non-caching-load.cpp +++ b/tools/non-caching-load/non-caching-load.cpp @@ -48,7 +48,7 @@ __global__ void nonCachingLoad(T* p, T* out){ } template -__global__ void builtinTemp(T* p, T* out){ +__global__ void builtinTemporalLoad(T* p, T* out){ if constexpr (std::is_same::value) p[0] = {22, 22, 22, 22}; else @@ -67,10 +67,8 @@ void caching_load() { hipMalloc(&out1, size); hipMalloc(&out2, size); - // hipMemcpy(&host_data, data, size, hipMemcpyHostToDevice); hipLaunchKernelGGL(nonCachingLoad, dim3(1), dim3(1), 0, 0, data, out1); - - hipLaunchKernelGGL(builtinTemp, dim3(1), dim3(1), 0, 0, data, out2); + hipLaunchKernelGGL(builtinTemporalLoad, dim3(1), dim3(1), 0, 0, data, out2); hipDeviceSynchronize(); @@ -113,4 +111,4 @@ int main(int argc, char **argv) caching_load(); return 0; -} \ No newline at end of file +}