Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CUDA] Add support for CUDA-12.3 and sm_90a #74895

Merged
merged 3 commits into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -973,6 +973,9 @@ CUDA/HIP Language Changes
CUDA Support
^^^^^^^^^^^^

- Clang now supports CUDA SDK up to 12.3
- Added support for sm_90a

AIX Support
^^^^^^^^^^^

Expand Down
13 changes: 11 additions & 2 deletions clang/include/clang/Basic/BuiltinsNVPTX.def
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
#pragma push_macro("SM_87")
#pragma push_macro("SM_89")
#pragma push_macro("SM_90")
#define SM_90 "sm_90"
#pragma push_macro("SM_90a")
#define SM_90a "sm_90a"
#define SM_90 "sm_90|" SM_90a
#define SM_89 "sm_89|" SM_90
#define SM_87 "sm_87|" SM_89
#define SM_86 "sm_86|" SM_87
Expand Down Expand Up @@ -56,7 +58,11 @@
#pragma push_macro("PTX78")
#pragma push_macro("PTX80")
#pragma push_macro("PTX81")
#define PTX81 "ptx81"
#pragma push_macro("PTX82")
#pragma push_macro("PTX83")
#define PTX83 "ptx83"
#define PTX82 "ptx82|" PTX83
#define PTX81 "ptx81|" PTX82
#define PTX80 "ptx80|" PTX81
#define PTX78 "ptx78|" PTX80
#define PTX77 "ptx77|" PTX78
Expand Down Expand Up @@ -1055,6 +1061,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
#pragma pop_macro("SM_87")
#pragma pop_macro("SM_89")
#pragma pop_macro("SM_90")
#pragma pop_macro("SM_90a")
#pragma pop_macro("PTX42")
#pragma pop_macro("PTX60")
#pragma pop_macro("PTX61")
Expand All @@ -1072,3 +1079,5 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
#pragma pop_macro("PTX78")
#pragma pop_macro("PTX80")
#pragma pop_macro("PTX81")
#pragma pop_macro("PTX82")
#pragma pop_macro("PTX83")
7 changes: 5 additions & 2 deletions clang/include/clang/Basic/Cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,11 @@ enum class CudaVersion {
CUDA_118,
CUDA_120,
CUDA_121,
FULLY_SUPPORTED = CUDA_118,
CUDA_122,
CUDA_123,
FULLY_SUPPORTED = CUDA_123,
PARTIALLY_SUPPORTED =
CUDA_121, // Partially supported. Proceed with a warning.
CUDA_123, // Partially supported. Proceed with a warning.
NEW = 10000, // Too new. Issue a warning, but allow using it.
};
const char *CudaVersionToString(CudaVersion V);
Expand Down Expand Up @@ -71,6 +73,7 @@ enum class CudaArch {
SM_87,
SM_89,
SM_90,
SM_90a,
GFX600,
GFX601,
GFX602,
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/Basic/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
CUDA_ENTRY(11, 8),
CUDA_ENTRY(12, 0),
CUDA_ENTRY(12, 1),
CUDA_ENTRY(12, 2),
CUDA_ENTRY(12, 3),
{"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
{"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
};
Expand Down Expand Up @@ -93,6 +95,7 @@ static const CudaArchToStringMap arch_names[] = {
SM(87), // Jetson/Drive AGX Orin
SM(89), // Ada Lovelace
SM(90), // Hopper
SM(90a), // Hopper
GFX(600), // gfx600
GFX(601), // gfx601
GFX(602), // gfx602
Expand Down Expand Up @@ -209,6 +212,8 @@ CudaVersion MinVersionForCudaArch(CudaArch A) {
case CudaArch::SM_89:
case CudaArch::SM_90:
return CudaVersion::CUDA_118;
case CudaArch::SM_90a:
return CudaVersion::CUDA_120;
default:
llvm_unreachable("invalid enum");
}
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Basic/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,11 +262,14 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case CudaArch::SM_89:
return "890";
case CudaArch::SM_90:
case CudaArch::SM_90a:
return "900";
}
llvm_unreachable("unhandled CudaArch");
}();
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
if (GPU == CudaArch::SM_90a)
Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
}
}

Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3483,6 +3483,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
case CudaArch::SM_87:
case CudaArch::SM_89:
case CudaArch::SM_90:
case CudaArch::SM_90a:
case CudaArch::GFX600:
case CudaArch::GFX601:
case CudaArch::GFX602:
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Driver/ToolChains/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ CudaVersion getCudaVersion(uint32_t raw_version) {
return CudaVersion::CUDA_120;
if (raw_version < 12020)
return CudaVersion::CUDA_121;
if (raw_version < 12030)
return CudaVersion::CUDA_122;
if (raw_version < 12040)
return CudaVersion::CUDA_123;
return CudaVersion::NEW;
}

Expand Down Expand Up @@ -671,6 +675,8 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
case CudaVersion::CUDA_##CUDA_VER: \
PtxFeature = "+ptx" #PTX_VER; \
break;
CASE_CUDA_VERSION(123, 83);
CASE_CUDA_VERSION(122, 82);
CASE_CUDA_VERSION(121, 81);
CASE_CUDA_VERSION(120, 80);
CASE_CUDA_VERSION(118, 78);
Expand Down
2 changes: 1 addition & 1 deletion clang/test/Misc/target-invalid-cpu-note.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
// NVPTX: error: unknown target CPU 'not-a-cpu'
// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201{{$}}
// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201{{$}}

// RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600
// R600: error: unknown target CPU 'not-a-cpu'
Expand Down
19 changes: 10 additions & 9 deletions llvm/lib/Target/NVPTX/NVPTX.td
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,24 @@ include "NVPTXInstrInfo.td"
// TableGen in NVPTXGenSubtarget.inc.
//===----------------------------------------------------------------------===//

class FeatureSM<int version>:
SubtargetFeature<"sm_"# version, "SmVersion",
"" # version,
"Target SM " # version>;
def SM90a: FeatureSM<90>;
class FeatureSM<string sm, int value>:
SubtargetFeature<"sm_"# sm, "FullSmVersion",
"" # value,
"Target SM " # sm>;

class FeaturePTX<int version>:
SubtargetFeature<"ptx"# version, "PTXVersion",
"" # version,
"Use PTX version " # version>;

foreach version = [20, 21, 30, 32, 35, 37, 50, 52, 53,
60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in
def SM#version: FeatureSM<version>;
foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in
def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;

def SM90a: FeatureSM<"90a", 901>;

foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 63, 64, 65,
70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81] in
70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81, 82, 83] in
def PTX#version: FeaturePTX<version>;

//===----------------------------------------------------------------------===//
Expand Down
7 changes: 6 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,

ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS);

// Re-map SM version numbers, SmVersion carries the regular SMs which do
// have relative order, while FullSmVersion allows distinguishing sm_90 from
// sm_90a, which would *not* be a subset of sm_91.
SmVersion = getSmVersion();

// Set default to PTX 6.0 (CUDA 9.0)
if (PTXVersion == 0) {
PTXVersion = 60;
Expand All @@ -48,7 +53,7 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const NVPTXTargetMachine &TM)
: NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
SmVersion(20), TM(TM),
FullSmVersion(200), SmVersion(getSmVersion()), TM(TM),
TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {}

bool NVPTXSubtarget::hasImageHandles() const {
Expand Down
17 changes: 15 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,12 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
// PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31
unsigned PTXVersion;

// SM version x.y is represented as 10*x+y, e.g. 3.1 == 31
// Full SM version x.y is represented as 100*x+10*y+feature, e.g. 3.1 == 310
// sm_90a == 901
unsigned int FullSmVersion;

// SM version x.y is represented as 10*x+y, e.g. 3.1 == 31. Derived from
// FullSmVersion.
unsigned int SmVersion;

const NVPTXTargetMachine &TM;
Expand Down Expand Up @@ -80,7 +85,15 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
bool allowFP16Math() const;
bool hasMaskOperator() const { return PTXVersion >= 71; }
bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; }
unsigned int getSmVersion() const { return SmVersion; }
unsigned int getFullSmVersion() const { return FullSmVersion; }
unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
// GPUs with "a" suffix have include architecture-accelerated features that
// are supported on the specified architecture only, hence such targets do not
// follow the onion layer model. hasAAFeatures() allows distinguishing such
// GPU variants from the base GPU architecture.
// - 0 represents base GPU model,
// - non-zero value identifies particular architecture-accelerated variant.
bool hasAAFeatures() const { return getFullSmVersion() % 10; }
std::string getTargetName() const { return TargetName; }

// Get maximum value of required alignments among the supported data types.
Expand Down