diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h index e6ba71d53e92b..167e613816394 100644 --- a/flang/include/flang/Semantics/semantics.h +++ b/flang/include/flang/Semantics/semantics.h @@ -215,8 +215,10 @@ class SemanticsContext { void UseFortranBuiltinsModule(); const Scope *GetBuiltinsScope() const { return builtinsScope_; } - void UsePPCBuiltinTypesModule(); const Scope &GetCUDABuiltinsScope(); + const Scope &GetCUDADeviceScope(); + + void UsePPCBuiltinTypesModule(); void UsePPCBuiltinsModule(); Scope *GetPPCBuiltinTypesScope() { return ppcBuiltinTypesScope_; } const Scope *GetPPCBuiltinsScope() const { return ppcBuiltinsScope_; } @@ -292,6 +294,7 @@ class SemanticsContext { const Scope *builtinsScope_{nullptr}; // module __Fortran_builtins Scope *ppcBuiltinTypesScope_{nullptr}; // module __Fortran_PPC_types std::optional cudaBuiltinsScope_; // module __CUDA_builtins + std::optional cudaDeviceScope_; // module cudadevice const Scope *ppcBuiltinsScope_{nullptr}; // module __ppc_intrinsics std::list modFileParseTrees_; std::unique_ptr commonBlockMap_; diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index e2875081b732c..d2503a053e669 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -3797,6 +3797,26 @@ bool SubprogramVisitor::Pre(const parser::PrefixSpec::Attributes &attrs) { subp->set_cudaSubprogramAttrs(attr); } } + if (auto attrs{subp->cudaSubprogramAttrs()}) { + if (*attrs == common::CUDASubprogramAttrs::Global || + *attrs == common::CUDASubprogramAttrs::Device) { + const Scope &scope{currScope()}; + const Scope *mod{FindModuleContaining(scope)}; + if (mod && mod->GetName().value() == "cudadevice") { + return false; + } + // Implicitly USE the cudadevice module by copying its symbols in the + // current scope. + const Scope &cudaDeviceScope{context().GetCUDADeviceScope()}; + for (auto sym : cudaDeviceScope.GetSymbols()) { + if (!currScope().FindSymbol(sym->name())) { + auto &localSymbol{MakeSymbol( + sym->name(), Attrs{}, UseDetails{sym->name(), *sym})}; + localSymbol.flags() = sym->flags(); + } + } + } + } } return false; } diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp index 6ccd915c4dcbf..d51cc62d804e8 100644 --- a/flang/lib/Semantics/semantics.cpp +++ b/flang/lib/Semantics/semantics.cpp @@ -543,6 +543,14 @@ const Scope &SemanticsContext::GetCUDABuiltinsScope() { return **cudaBuiltinsScope_; } +const Scope &SemanticsContext::GetCUDADeviceScope() { + if (!cudaDeviceScope_) { + cudaDeviceScope_ = GetBuiltinModule("cudadevice"); + CHECK(cudaDeviceScope_.value() != nullptr); + } + return **cudaDeviceScope_; +} + void SemanticsContext::UsePPCBuiltinsModule() { if (ppcBuiltinsScope_ == nullptr) { ppcBuiltinsScope_ = GetBuiltinModule("__ppc_intrinsics"); diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90 new file mode 100644 index 0000000000000..f34820dd10792 --- /dev/null +++ b/flang/module/cudadevice.f90 @@ -0,0 +1,74 @@ +!===-- module/cudedevice.f90 -----------------------------------------------===! +! +! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +! See https://llvm.org/LICENSE.txt for license information. +! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +! +!===------------------------------------------------------------------------===! + +! CUDA Fortran procedures available in device subprogram + +module cudadevice +implicit none + + ! Set PRIVATE by default to explicitly only export what is meant + ! to be exported by this MODULE. + private + + ! Synchronization Functions + + interface + attributes(device) subroutine syncthreads() + end subroutine + end interface + public :: syncthreads + + interface + attributes(device) integer function syncthreads_and(value) + integer :: value + end function + end interface + public :: syncthreads_and + + interface + attributes(device) integer function syncthreads_count(value) + integer :: value + end function + end interface + public :: syncthreads_count + + interface + attributes(device) integer function syncthreads_or(value) + integer :: value + end function + end interface + public :: syncthreads_or + + interface + attributes(device) subroutine syncwarp(mask) + integer :: mask + end subroutine + end interface + public :: syncwarp + + ! Memory Fences + + interface + attributes(device) subroutine threadfence() + end subroutine + end interface + public :: threadfence + + interface + attributes(device) subroutine threadfence_block() + end subroutine + end interface + public :: threadfence_block + + interface + attributes(device) subroutine threadfence_system() + end subroutine + end interface + public :: threadfence_system + +end module diff --git a/flang/test/Semantics/cuf-device-procedures01.cuf b/flang/test/Semantics/cuf-device-procedures01.cuf new file mode 100644 index 0000000000000..b9918d8a4ae4c --- /dev/null +++ b/flang/test/Semantics/cuf-device-procedures01.cuf @@ -0,0 +1,35 @@ +! RUN: %flang_fc1 -fdebug-dump-symbols %s | FileCheck %s + +! Test CUDA Fortran intrinsic can pass semantic + +attributes(global) subroutine devsub() + implicit none + integer :: ret + + ! 3.6.4. Synchronization Functions + call syncthreads() + call syncwarp(1) + call threadfence() + call threadfence_block() + call threadfence_system() + ret = syncthreads_and(1) + ret = syncthreads_count(1) + ret = syncthreads_or(1) +end + +! CHECK-LABEL: Subprogram scope: devsub +! CHECK: syncthreads (Subroutine): Use from syncthreads in cudadevice +! CHECK: syncthreads_and (Function): Use from syncthreads_and in cudadevice +! CHECK: syncthreads_count (Function): Use from syncthreads_count in cudadevice +! CHECK: syncthreads_or (Function): Use from syncthreads_or in cudadevice +! CHECK: syncwarp (Subroutine): Use from syncwarp in cudadevice +! CHECK: threadfence (Subroutine): Use from threadfence in cudadevice +! CHECK: threadfence_block (Subroutine): Use from threadfence_block in cudadevice +! CHECK: threadfence_system (Subroutine): Use from threadfence_system in cudadevice + +subroutine host() + call syncthreads() +end subroutine + +! CHECK-LABEL: Subprogram scope: host +! CHECK: syncthreads, EXTERNAL: HostAssoc{{$}} diff --git a/flang/test/Semantics/cuf-device-procedures02.cuf b/flang/test/Semantics/cuf-device-procedures02.cuf new file mode 100644 index 0000000000000..c93fc4033b8f0 --- /dev/null +++ b/flang/test/Semantics/cuf-device-procedures02.cuf @@ -0,0 +1,17 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 + +module dev + integer, device :: syncthreads + +contains + + attributes(device) subroutine sub1() + syncthreads = 1 ! syncthreads not overwritten by cudadevice + end subroutine + + attributes(global) subroutine sub2() +!ERROR: 'threadfence' is use-associated from module 'cudadevice' and cannot be re-declared + integer :: threadfence + end subroutine +end module + diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt index 64815a1f5da62..35e1cdafd3ae3 100644 --- a/flang/tools/f18/CMakeLists.txt +++ b/flang/tools/f18/CMakeLists.txt @@ -12,6 +12,7 @@ set(MODULES "__ppc_intrinsics" "mma" "__cuda_builtins" + "cudadevice" "ieee_arithmetic" "ieee_exceptions" "ieee_features" @@ -26,11 +27,15 @@ set(MODULES if (NOT CMAKE_CROSSCOMPILING) foreach(filename ${MODULES}) set(depends "") + set(opts "") if(${filename} STREQUAL "__fortran_builtins" OR ${filename} STREQUAL "__ppc_types") elseif(${filename} STREQUAL "__ppc_intrinsics" OR ${filename} STREQUAL "mma") set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod) + elseif(${filename} STREQUAL "cudadevice") + set(opts -fc1 -xcuda) + set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod) else() set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod) if(NOT ${filename} STREQUAL "__fortran_type_info") @@ -43,9 +48,8 @@ if (NOT CMAKE_CROSSCOMPILING) endif() # The module contains PPC vector types that needs the PPC target. - set(opts "") - if(${filename} STREQUAL "__ppc_intrinsics" OR - ${filename} STREQUAL "mma") + if(${filename} STREQUAL "__ppc_intrinsics" OR + ${filename} STREQUAL "mma") if (PowerPC IN_LIST LLVM_TARGETS_TO_BUILD) set(opts "--target=ppc64le") else() @@ -58,7 +62,7 @@ if (NOT CMAKE_CROSSCOMPILING) # TODO: We may need to flag this with conditional, in case Flang is built w/o OpenMP support add_custom_command(OUTPUT ${base}.mod COMMAND ${CMAKE_COMMAND} -E make_directory ${FLANG_INTRINSIC_MODULES_DIR} - COMMAND flang-new -cpp -fsyntax-only ${opts} -module-dir ${FLANG_INTRINSIC_MODULES_DIR} + COMMAND flang-new ${opts} -cpp -fsyntax-only -module-dir ${FLANG_INTRINSIC_MODULES_DIR} ${FLANG_SOURCE_DIR}/module/${filename}.f90 DEPENDS flang-new ${FLANG_SOURCE_DIR}/module/${filename}.f90 ${FLANG_SOURCE_DIR}/module/__fortran_builtins.f90 ${depends} )