Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CUDA] Add support for CUDA-12.6 and sm_100 #112028

Merged
merged 2 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,8 @@ CUDA/HIP Language Changes

CUDA Support
^^^^^^^^^^^^
- Clang now supports CUDA SDK up to 12.6
- Added support for sm_100

AIX Support
^^^^^^^^^^^
Expand Down
10 changes: 8 additions & 2 deletions clang/include/clang/Basic/BuiltinsNVPTX.def
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@
#pragma push_macro("SM_89")
#pragma push_macro("SM_90")
#pragma push_macro("SM_90a")
#pragma push_macro("SM_100")
#define SM_100 "sm_100"
#define SM_90a "sm_90a"
#define SM_90 "sm_90|" SM_90a
#define SM_90 "sm_90|" SM_90a "|" SM_100
#define SM_89 "sm_89|" SM_90
#define SM_87 "sm_87|" SM_89
#define SM_86 "sm_86|" SM_87
Expand Down Expand Up @@ -63,7 +65,9 @@
#pragma push_macro("PTX83")
#pragma push_macro("PTX84")
#pragma push_macro("PTX85")
#define PTX85 "ptx85"
#pragma push_macro("PTX86")
#define PTX86 "ptx86"
#define PTX85 "ptx85|" PTX86
#define PTX84 "ptx84|" PTX85
#define PTX83 "ptx83|" PTX84
#define PTX82 "ptx82|" PTX83
Expand Down Expand Up @@ -1086,6 +1090,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
#pragma pop_macro("SM_89")
#pragma pop_macro("SM_90")
#pragma pop_macro("SM_90a")
#pragma pop_macro("SM_100")
#pragma pop_macro("PTX42")
#pragma pop_macro("PTX60")
#pragma pop_macro("PTX61")
Expand All @@ -1108,3 +1113,4 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
#pragma pop_macro("PTX83")
#pragma pop_macro("PTX84")
#pragma pop_macro("PTX85")
#pragma pop_macro("PTX86")
4 changes: 3 additions & 1 deletion clang/include/clang/Basic/Cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,10 @@ enum class CudaVersion {
CUDA_123,
CUDA_124,
CUDA_125,
CUDA_126,
FULLY_SUPPORTED = CUDA_123,
PARTIALLY_SUPPORTED =
CUDA_125, // Partially supported. Proceed with a warning.
CUDA_126, // Partially supported. Proceed with a warning.
NEW = 10000, // Too new. Issue a warning, but allow using it.
};
const char *CudaVersionToString(CudaVersion V);
Expand Down Expand Up @@ -78,6 +79,7 @@ enum class OffloadArch {
SM_89,
SM_90,
SM_90a,
SM_100,
GFX600,
GFX601,
GFX602,
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/Basic/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
CUDA_ENTRY(12, 3),
CUDA_ENTRY(12, 4),
CUDA_ENTRY(12, 5),
CUDA_ENTRY(12, 6),
{"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
{"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
};
Expand Down Expand Up @@ -96,6 +97,7 @@ static const OffloadArchToStringMap arch_names[] = {
SM(89), // Ada Lovelace
SM(90), // Hopper
SM(90a), // Hopper
SM(100), // Blackwell
GFX(600), // gfx600
GFX(601), // gfx601
GFX(602), // gfx602
Expand Down Expand Up @@ -221,6 +223,9 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
return CudaVersion::CUDA_118;
case OffloadArch::SM_90a:
return CudaVersion::CUDA_120;
case OffloadArch::SM_100:
return CudaVersion::NEW; // TODO: use specific CUDA version once it's
// public.
default:
llvm_unreachable("invalid enum");
}
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Basic/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case OffloadArch::SM_90:
case OffloadArch::SM_90a:
return "900";
case OffloadArch::SM_100:
return "1000";
}
llvm_unreachable("unhandled OffloadArch");
}();
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2274,6 +2274,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
case OffloadArch::SM_89:
case OffloadArch::SM_90:
case OffloadArch::SM_90a:
case OffloadArch::SM_100:
case OffloadArch::GFX600:
case OffloadArch::GFX601:
case OffloadArch::GFX602:
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/Driver/ToolChains/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ CudaVersion getCudaVersion(uint32_t raw_version) {
return CudaVersion::CUDA_124;
if (raw_version < 12060)
return CudaVersion::CUDA_125;
if (raw_version < 12070)
return CudaVersion::CUDA_126;
return CudaVersion::NEW;
}

Expand Down Expand Up @@ -669,6 +671,7 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
case CudaVersion::CUDA_##CUDA_VER: \
PtxFeature = "+ptx" #PTX_VER; \
break;
CASE_CUDA_VERSION(126, 85);
CASE_CUDA_VERSION(125, 85);
CASE_CUDA_VERSION(124, 84);
CASE_CUDA_VERSION(123, 83);
Expand All @@ -691,6 +694,10 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
CASE_CUDA_VERSION(91, 61);
CASE_CUDA_VERSION(90, 60);
#undef CASE_CUDA_VERSION
// TODO: Use specific CUDA version once it's public.
case clang::CudaVersion::NEW:
PtxFeature = "+ptx86";
break;
default:
PtxFeature = "+ptx42";
}
Expand Down
1 change: 1 addition & 0 deletions clang/test/Misc/target-invalid-cpu-note/nvptx.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
// CHECK-SAME: {{^}}, sm_89
// CHECK-SAME: {{^}}, sm_90
// CHECK-SAME: {{^}}, sm_90a
// CHECK-SAME: {{^}}, sm_100
// CHECK-SAME: {{^}}, gfx600
// CHECK-SAME: {{^}}, gfx601
// CHECK-SAME: {{^}}, gfx602
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTX.td
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ class FeaturePTX<int version>:
"Use PTX version " # version>;

foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in
60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90, 100] in
def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;

def SM90a: FeatureSM<"90a", 901>;

foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
70, 71, 72, 73, 74, 75, 76, 77, 78,
80, 81, 82, 83, 84, 85] in
80, 81, 82, 83, 84, 85, 86] in
def PTX#version: FeaturePTX<version>;

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -73,6 +73,7 @@ def : Proc<"sm_87", [SM87, PTX74]>;
def : Proc<"sm_89", [SM89, PTX78]>;
def : Proc<"sm_90", [SM90, PTX78]>;
def : Proc<"sm_90a", [SM90a, PTX80]>;
def : Proc<"sm_100", [SM100, PTX86]>;

def NVPTXInstrInfo : InstrInfo {
}
Expand Down
Loading