-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[flang][cuda] Avoid to issue data transfer in device context #90247
[flang][cuda] Avoid to issue data transfer in device context #90247
Conversation
Data transfer should not be issued in device function.
@llvm/pr-subscribers-flang-fir-hlfir Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesData transfer should not be issued in device function. Full diff: https://github.com/llvm/llvm-project/pull/90247.diff 2 Files Affected:
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 19c00884bd1b7e..e3679ef2afbbc4 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -3807,16 +3807,30 @@ class FirConverter : public Fortran::lower::AbstractConverter {
return temps;
}
+ static bool isDeviceContext(fir::FirOpBuilder &builder) {
+ if (builder.getRegion().getParentOfType<fir::CUDAKernelOp>())
+ return true;
+ if (auto funcOp =
+ builder.getRegion().getParentOfType<mlir::func::FuncOp>()) {
+ if (auto cudaProcAttr =
+ funcOp.getOperation()->getAttrOfType<fir::CUDAProcAttributeAttr>(
+ fir::getCUDAAttrName())) {
+ return cudaProcAttr.getValue() != fir::CUDAProcAttribute::Host;
+ }
+ }
+ return false;
+ }
+
void genDataAssignment(
const Fortran::evaluate::Assignment &assign,
const Fortran::evaluate::ProcedureRef *userDefinedAssignment) {
mlir::Location loc = getCurrentLocation();
fir::FirOpBuilder &builder = getFirOpBuilder();
- bool isInDeviceContext =
- builder.getRegion().getParentOfType<fir::CUDAKernelOp>();
- bool isCUDATransfer = Fortran::evaluate::HasCUDAAttrs(assign.lhs) ||
- Fortran::evaluate::HasCUDAAttrs(assign.rhs);
+ bool isInDeviceContext = isDeviceContext(builder);
+ bool isCUDATransfer = (Fortran::evaluate::HasCUDAAttrs(assign.lhs) ||
+ Fortran::evaluate::HasCUDAAttrs(assign.rhs)) &&
+ !isInDeviceContext;
bool hasCUDAImplicitTransfer =
Fortran::evaluate::HasCUDAImplicitTransfer(assign.rhs);
llvm::SmallVector<mlir::Value> implicitTemps;
@@ -3879,7 +3893,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
Fortran::lower::StatementContext localStmtCtx;
hlfir::Entity rhs = evaluateRhs(localStmtCtx);
hlfir::Entity lhs = evaluateLhs(localStmtCtx);
- if (isCUDATransfer && !hasCUDAImplicitTransfer && !isInDeviceContext)
+ if (isCUDATransfer && !hasCUDAImplicitTransfer)
genCUDADataTransfer(builder, loc, assign, lhs, rhs);
else
builder.create<hlfir::AssignOp>(loc, rhs, lhs,
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index 70483685d20019..add1052e576c15 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -141,3 +141,13 @@ end subroutine
! CHECK: fir.cuda_kernel<<<*, *>>>
! CHECK-NOT: fir.cuda_data_transfer
! CHECK: hlfir.assign
+
+attributes(global) subroutine sub5(a)
+ integer, device :: a
+ integer :: i
+ i = threadIdx%x + (blockIdx%x - 1) * blockDim%x
+ a = i
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub5
+! CHECK-NOT: fir.cuda_data_transfer
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you, Valentin!
Data transfer should not be issued in device function.