Skip to content

Commit

Permalink
Fix previous commit
Browse files Browse the repository at this point in the history
  • Loading branch information
wenkaidu committed Dec 3, 2024
1 parent c2473d1 commit bc7f282
Showing 1 changed file with 1 addition and 2 deletions.
3 changes: 1 addition & 2 deletions src/device/op128.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,7 @@ template<> __device__ __forceinline__ void st_global<0>(uintptr_t addr, BytePack
} \
template<> \
__device__ __forceinline__ void st_##space<bytes>(addr_cxx_ty addr, BytePack<bytes> value) { \
data_cxx_ty tmp = __builtin_nontemporal_load((data_cxx_ty *)&value.native); \
__builtin_nontemporal_store(tmp, (data_cxx_ty *)addr); \
__builtin_nontemporal_store(value.native, (data_cxx_ty *)addr); \
}

// #if __CUDA_ARCH__ >= 700
Expand Down

0 comments on commit bc7f282

Please sign in to comment.