Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[flang][cuda] Implicitly load cudadevice module in device/global subprogram #91668

Merged
merged 4 commits into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion flang/include/flang/Semantics/semantics.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,11 @@ class SemanticsContext {
// Defines builtinsScope_ from the __Fortran_builtins module
void UseFortranBuiltinsModule();
const Scope *GetBuiltinsScope() const { return builtinsScope_; }

const Scope &GetCUDABuiltinsScope();
const Scope &GetCUDADeviceScope();

void UsePPCBuiltinTypesModule();
const Scope &GetCUDABuiltinsScope();
void UsePPCBuiltinsModule();
Scope *GetPPCBuiltinTypesScope() { return ppcBuiltinTypesScope_; }
const Scope *GetPPCBuiltinsScope() const { return ppcBuiltinsScope_; }
Expand Down Expand Up @@ -292,6 +294,7 @@ class SemanticsContext {
const Scope *builtinsScope_{nullptr}; // module __Fortran_builtins
Scope *ppcBuiltinTypesScope_{nullptr}; // module __Fortran_PPC_types
std::optional<const Scope *> cudaBuiltinsScope_; // module __CUDA_builtins
std::optional<const Scope *> cudaDeviceScope_; // module cudadevice
const Scope *ppcBuiltinsScope_{nullptr}; // module __ppc_intrinsics
std::list<parser::Program> modFileParseTrees_;
std::unique_ptr<CommonBlockMap> commonBlockMap_;
Expand Down
4 changes: 4 additions & 0 deletions flang/lib/Semantics/check-cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ struct DeviceExprChecker
}
}
}
if (sym->owner().IsModule() &&
DEREF(sym->owner().symbol()).name() == "__cuda_device_builtins") {
clementval marked this conversation as resolved.
Show resolved Hide resolved
return {};
}
} else if (x.GetSpecificIntrinsic()) {
// TODO(CUDA): Check for unsupported intrinsics here
return {};
Expand Down
11 changes: 11 additions & 0 deletions flang/lib/Semantics/resolve-names.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3797,6 +3797,17 @@ bool SubprogramVisitor::Pre(const parser::PrefixSpec::Attributes &attrs) {
subp->set_cudaSubprogramAttrs(attr);
}
}
if (auto attrs{subp->cudaSubprogramAttrs()}) {
if (*attrs == common::CUDASubprogramAttrs::Global ||
*attrs == common::CUDASubprogramAttrs::Device) {
// Implicitly USE the cudadevice module by copying its symbol in the
// current scope.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/symbol/symbols/

What about clashes with names that are already in scope (or declared later)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So according to the reference compiler we should not overwrite what is already in scope so I updated the copy of symbol and added a test.
For what is declared after, in device or global procedure, we will have an error msg. I added a test also for this case.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the semantics of these names are not really what you get from either a USE statement or from intrinsics.

There is already precedence here with names like threadIdx that are automatically imported into device subprogram scopes, so I guess this isn't making things any worse.

const Scope &scope{context().GetCUDADeviceScope()};
for (auto sym : scope.GetSymbols()) {
currScope().CopySymbol(sym);
}
}
}
}
return false;
}
Expand Down
8 changes: 8 additions & 0 deletions flang/lib/Semantics/semantics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,14 @@ const Scope &SemanticsContext::GetCUDABuiltinsScope() {
return **cudaBuiltinsScope_;
}

const Scope &SemanticsContext::GetCUDADeviceScope() {
if (!cudaDeviceScope_) {
cudaDeviceScope_ = GetBuiltinModule("cudadevice");
CHECK(cudaDeviceScope_.value() != nullptr);
}
return **cudaDeviceScope_;
}

void SemanticsContext::UsePPCBuiltinsModule() {
if (ppcBuiltinsScope_ == nullptr) {
ppcBuiltinsScope_ = GetBuiltinModule("__ppc_intrinsics");
Expand Down
74 changes: 74 additions & 0 deletions flang/module/__cuda_device_builtins.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
!===-- module/__cuda_device_builtins.f90 -----------------------------------===!
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These could all be in __fortran_builtins, which already contains some built-in CUDA definitions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason I added a new file is that we can check on the module name instead of having to check on the module name and the procedure prefix.
I can moved them in __fortran_builtins but we would need to check if the procedure names starts with __cuda_device_builtins_ as well.

!
! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
! See https://llvm.org/LICENSE.txt for license information.
! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
!
!===------------------------------------------------------------------------===!

! CUDA Fortran procedures available in device subprogram

module __CUDA_device_builtins

implicit none

! Set PRIVATE by default to explicitly only export what is meant
! to be exported by this MODULE.
private

! Synchronization Functions

interface
subroutine __cuda_device_builtins_syncthreads()
end subroutine
end interface
public :: __cuda_device_builtins_syncthreads

interface
integer function __cuda_device_builtins_syncthreads_and(value)
integer :: value
end function
end interface
public :: __cuda_device_builtins_syncthreads_and

interface
integer function __cuda_device_builtins_syncthreads_count(value)
integer :: value
end function
end interface
public :: __cuda_device_builtins_syncthreads_count

interface
integer function __cuda_device_builtins_syncthreads_or(int_value)
end function
end interface
public :: __cuda_device_builtins_syncthreads_or

interface
subroutine __cuda_device_builtins_syncwarp(mask)
integer :: mask
end subroutine
end interface
public :: __cuda_device_builtins_syncwarp

! Memory Fences

interface
subroutine __cuda_device_builtins_threadfence()
end subroutine
end interface
public :: __cuda_device_builtins_threadfence

interface
subroutine __cuda_device_builtins_threadfence_block()
end subroutine
end interface
public :: __cuda_device_builtins_threadfence_block

interface
subroutine __cuda_device_builtins_threadfence_system()
end subroutine
end interface
public :: __cuda_device_builtins_threadfence_system

end module
21 changes: 21 additions & 0 deletions flang/module/cudadevice.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
!===-- module/cudedevice.f90 -----------------------------------------------===!
klausler marked this conversation as resolved.
Show resolved Hide resolved
!
! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
! See https://llvm.org/LICENSE.txt for license information.
! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
!
!===------------------------------------------------------------------------===!

! CUDA Fortran procedures available in device subprogram

module cudadevice
use __cuda_device_builtins, only: &
syncthreads => __cuda_device_builtins_syncthreads, &
syncthreads_and => __cuda_device_builtins_syncthreads_and, &
syncthreads_count => __cuda_device_builtins_syncthreads_count, &
syncthreads_or => __cuda_device_builtins_syncthreads_or, &
syncwarp => __cuda_device_builtins_syncwarp, &
threadfence => __cuda_device_builtins_threadfence, &
threadfence_block => __cuda_device_builtins_threadfence_block, &
threadfence_system => __cuda_device_builtins_threadfence_system
end module
35 changes: 35 additions & 0 deletions flang/test/Semantics/cuf-device-procedures.cuf
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
! RUN: %flang_fc1 -fdebug-dump-symbols %s | FileCheck %s

! Test CUDA Fortran intrinsic can pass semantic

attributes(global) subroutine devsub()
implicit none
integer :: ret

! 3.6.4. Synchronization Functions
call syncthreads()
call syncwarp(1)
call threadfence()
call threadfence_block()
call threadfence_system()
ret = syncthreads_and(1)
ret = syncthreads_count(1)
ret = syncthreads_or(1)
end

! CHECK-LABEL: Subprogram scope: devsub
! CHECK: syncthreads, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_syncthreads in __cuda_device_builtins
! CHECK: syncthreads_and, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_and in __cuda_device_builtins
! CHECK: syncthreads_count, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_count in __cuda_device_builtins
! CHECK: syncthreads_or, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_or in __cuda_device_builtins
! CHECK: syncwarp, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_syncwarp in __cuda_device_builtins
! CHECK: threadfence, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence in __cuda_device_builtins
! CHECK: threadfence_block, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence_block in __cuda_device_builtins
! CHECK: threadfence_system, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence_system in __cuda_device_builtins

subroutine host()
call syncthreads()
end subroutine

! CHECK-LABEL: Subprogram scope: host
! CHECK: syncthreads, EXTERNAL: HostAssoc{{$}}
2 changes: 2 additions & 0 deletions flang/tools/f18/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ set(MODULES
"__ppc_intrinsics"
"mma"
"__cuda_builtins"
"__cuda_device_builtins"
"cudadevice"
"ieee_arithmetic"
"ieee_exceptions"
"ieee_features"
Expand Down
Loading