-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[flang][cuda] Implicitly load cudadevice module in device/global subprogram #91668
Conversation
@llvm/pr-subscribers-flang-semantics Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesSome functions and subroutines are available in device context (device/global). These functions have interfaces declared in the This patch adds interfaces as The builtin module only contains procedures from section 3.6.4 for now. Full diff: https://github.com/llvm/llvm-project/pull/91668.diff 8 Files Affected:
diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h
index e6ba71d53e92b..367c9224df974 100644
--- a/flang/include/flang/Semantics/semantics.h
+++ b/flang/include/flang/Semantics/semantics.h
@@ -214,9 +214,11 @@ class SemanticsContext {
// Defines builtinsScope_ from the __Fortran_builtins module
void UseFortranBuiltinsModule();
const Scope *GetBuiltinsScope() const { return builtinsScope_; }
+
+ const Scope &GetCUDABuiltinsScope();
+ const Scope &GetCUDADeviceScope();
void UsePPCBuiltinTypesModule();
- const Scope &GetCUDABuiltinsScope();
void UsePPCBuiltinsModule();
Scope *GetPPCBuiltinTypesScope() { return ppcBuiltinTypesScope_; }
const Scope *GetPPCBuiltinsScope() const { return ppcBuiltinsScope_; }
@@ -292,6 +294,7 @@ class SemanticsContext {
const Scope *builtinsScope_{nullptr}; // module __Fortran_builtins
Scope *ppcBuiltinTypesScope_{nullptr}; // module __Fortran_PPC_types
std::optional<const Scope *> cudaBuiltinsScope_; // module __CUDA_builtins
+ std::optional<const Scope *> cudaDeviceScope_; // module cudadevice
const Scope *ppcBuiltinsScope_{nullptr}; // module __ppc_intrinsics
std::list<parser::Program> modFileParseTrees_;
std::unique_ptr<CommonBlockMap> commonBlockMap_;
diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp
index 96ab902392633..6c32db4dbd1b3 100644
--- a/flang/lib/Semantics/check-cuda.cpp
+++ b/flang/lib/Semantics/check-cuda.cpp
@@ -82,6 +82,10 @@ struct DeviceExprChecker
}
}
}
+ if (sym->owner().IsModule() &&
+ DEREF(sym->owner().symbol()).name() == "__cuda_device_builtins") {
+ return {};
+ }
} else if (x.GetSpecificIntrinsic()) {
// TODO(CUDA): Check for unsupported intrinsics here
return {};
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 61394b0f41de7..16c555ff668aa 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -3797,6 +3797,17 @@ bool SubprogramVisitor::Pre(const parser::PrefixSpec::Attributes &attrs) {
subp->set_cudaSubprogramAttrs(attr);
}
}
+ if (auto attrs{subp->cudaSubprogramAttrs()}) {
+ if (*attrs == common::CUDASubprogramAttrs::Global ||
+ *attrs == common::CUDASubprogramAttrs::Device) {
+ // Implicitly USE the cudadevice module by copying its symbol in the
+ // current scope.
+ const Scope &scope{context().GetCUDADeviceScope()};
+ for (auto sym : scope.GetSymbols()) {
+ currScope().CopySymbol(sym);
+ }
+ }
+ }
}
return false;
}
diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp
index 6ccd915c4dcbf..d51cc62d804e8 100644
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@@ -543,6 +543,14 @@ const Scope &SemanticsContext::GetCUDABuiltinsScope() {
return **cudaBuiltinsScope_;
}
+const Scope &SemanticsContext::GetCUDADeviceScope() {
+ if (!cudaDeviceScope_) {
+ cudaDeviceScope_ = GetBuiltinModule("cudadevice");
+ CHECK(cudaDeviceScope_.value() != nullptr);
+ }
+ return **cudaDeviceScope_;
+}
+
void SemanticsContext::UsePPCBuiltinsModule() {
if (ppcBuiltinsScope_ == nullptr) {
ppcBuiltinsScope_ = GetBuiltinModule("__ppc_intrinsics");
diff --git a/flang/module/__cuda_device_builtins.f90 b/flang/module/__cuda_device_builtins.f90
new file mode 100644
index 0000000000000..738dc97242f2b
--- /dev/null
+++ b/flang/module/__cuda_device_builtins.f90
@@ -0,0 +1,74 @@
+!===-- module/__cuda_device_builtins.f90 -----------------------------------===!
+!
+! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+! See https://llvm.org/LICENSE.txt for license information.
+! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+!
+!===------------------------------------------------------------------------===!
+
+! CUDA Fortran procedures available in device subprogram
+
+module __CUDA_device_builtins
+
+ implicit none
+
+ ! Set PRIVATE by default to explicitly only export what is meant
+ ! to be exported by this MODULE.
+ private
+
+ ! Synchronization Functions
+
+ interface
+ subroutine __cuda_device_builtins_syncthreads()
+ end subroutine
+ end interface
+ public :: __cuda_device_builtins_syncthreads
+
+ interface
+ integer function __cuda_device_builtins_syncthreads_and(value)
+ integer :: value
+ end function
+ end interface
+ public :: __cuda_device_builtins_syncthreads_and
+
+ interface
+ integer function __cuda_device_builtins_syncthreads_count(value)
+ integer :: value
+ end function
+ end interface
+ public :: __cuda_device_builtins_syncthreads_count
+
+ interface
+ integer function __cuda_device_builtins_syncthreads_or(int_value)
+ end function
+ end interface
+ public :: __cuda_device_builtins_syncthreads_or
+
+ interface
+ subroutine __cuda_device_builtins_syncwarp(mask)
+ integer :: mask
+ end subroutine
+ end interface
+ public :: __cuda_device_builtins_syncwarp
+
+ ! Memory Fences
+
+ interface
+ subroutine __cuda_device_builtins_threadfence()
+ end subroutine
+ end interface
+ public :: __cuda_device_builtins_threadfence
+
+ interface
+ subroutine __cuda_device_builtins_threadfence_block()
+ end subroutine
+ end interface
+ public :: __cuda_device_builtins_threadfence_block
+
+ interface
+ subroutine __cuda_device_builtins_threadfence_system()
+ end subroutine
+ end interface
+ public :: __cuda_device_builtins_threadfence_system
+
+end module
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
new file mode 100644
index 0000000000000..b635d77ea4529
--- /dev/null
+++ b/flang/module/cudadevice.f90
@@ -0,0 +1,21 @@
+!===-- module/cudedevice.f90 -----------------------------------------------===!
+!
+! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+! See https://llvm.org/LICENSE.txt for license information.
+! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+!
+!===------------------------------------------------------------------------===!
+
+! CUDA Fortran procedures available in device subprogram
+
+module cudadevice
+ use __cuda_device_builtins, only: &
+ syncthreads => __cuda_device_builtins_syncthreads, &
+ syncthreads_and => __cuda_device_builtins_syncthreads_and, &
+ syncthreads_count => __cuda_device_builtins_syncthreads_count, &
+ syncthreads_or => __cuda_device_builtins_syncthreads_or, &
+ syncwarp => __cuda_device_builtins_syncwarp, &
+ threadfence => __cuda_device_builtins_threadfence, &
+ threadfence_block => __cuda_device_builtins_threadfence_block, &
+ threadfence_system => __cuda_device_builtins_threadfence_system
+end module
diff --git a/flang/test/Semantics/cuf-device-procedures.cuf b/flang/test/Semantics/cuf-device-procedures.cuf
new file mode 100644
index 0000000000000..e79423e3587a1
--- /dev/null
+++ b/flang/test/Semantics/cuf-device-procedures.cuf
@@ -0,0 +1,35 @@
+! RUN: %flang_fc1 -fdebug-dump-symbols %s | FileCheck %s
+
+! Test CUDA Fortran intrinsic can pass semantic
+
+attributes(global) subroutine devsub()
+ implicit none
+ integer :: ret
+
+ ! 3.6.4. Synchronization Functions
+ call syncthreads()
+ call syncwarp(1)
+ call threadfence()
+ call threadfence_block()
+ call threadfence_system()
+ ret = syncthreads_and(1)
+ ret = syncthreads_count(1)
+ ret = syncthreads_or(1)
+end
+
+! CHECK-LABEL: Subprogram scope: devsub
+! CHECK: syncthreads, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_syncthreads in __cuda_device_builtins
+! CHECK: syncthreads_and, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_and in __cuda_device_builtins
+! CHECK: syncthreads_count, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_count in __cuda_device_builtins
+! CHECK: syncthreads_or, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_or in __cuda_device_builtins
+! CHECK: syncwarp, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_syncwarp in __cuda_device_builtins
+! CHECK: threadfence, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence in __cuda_device_builtins
+! CHECK: threadfence_block, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence_block in __cuda_device_builtins
+! CHECK: threadfence_system, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence_system in __cuda_device_builtins
+
+subroutine host()
+ call syncthreads()
+end subroutine
+
+! CHECK-LABEL: Subprogram scope: host
+! CHECK: syncthreads, EXTERNAL: HostAssoc{{$}}
diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 64815a1f5da62..e5cf945d1f118 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -12,6 +12,8 @@ set(MODULES
"__ppc_intrinsics"
"mma"
"__cuda_builtins"
+ "__cuda_device_builtins"
+ "cudadevice"
"ieee_arithmetic"
"ieee_exceptions"
"ieee_features"
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
if (*attrs == common::CUDASubprogramAttrs::Global || | ||
*attrs == common::CUDASubprogramAttrs::Device) { | ||
// Implicitly USE the cudadevice module by copying its symbol in the | ||
// current scope. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
s/symbol/symbols/
What about clashes with names that are already in scope (or declared later)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So according to the reference compiler we should not overwrite what is already in scope so I updated the copy of symbol and added a test.
For what is declared after, in device or global procedure, we will have an error msg. I added a test also for this case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So the semantics of these names are not really what you get from either a USE statement or from intrinsics.
There is already precedence here with names like threadIdx
that are automatically imported into device subprogram scopes, so I guess this isn't making things any worse.
if (*attrs == common::CUDASubprogramAttrs::Global || | ||
*attrs == common::CUDASubprogramAttrs::Device) { | ||
// Implicitly USE the cudadevice module by copying its symbol in the | ||
// current scope. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So the semantics of these names are not really what you get from either a USE statement or from intrinsics.
There is already precedence here with names like threadIdx
that are automatically imported into device subprogram scopes, so I guess this isn't making things any worse.
@@ -0,0 +1,74 @@ | |||
!===-- module/__cuda_device_builtins.f90 -----------------------------------===! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These could all be in __fortran_builtins
, which already contains some built-in CUDA definitions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The reason I added a new file is that we can check on the module name instead of having to check on the module name and the procedure prefix.
I can moved them in __fortran_builtins but we would need to check if the procedure names starts with __cuda_device_builtins_
as well.
Some functions and subroutines are available in device context (device/global). These functions have interfaces declared in the
cudadevice
module.This patch adds interfaces as
__cuda_device_builtins_<fctname>
in a builtin module and they are USE'd rename in thecudadevice
module. The module is implicitly used in device/global subprograms.The builtin module only contains procedures from section 3.6.4 for now.