Skip to content

Commit

Permalink
[CUDA] Add support for CUDA-12.6 and sm_100 (llvm#112028)
Browse files Browse the repository at this point in the history
This is a copy of llvm#97402(with minor updates), which is now ready to land.

---------

Co-authored-by: Sergey Kozub <skozub@nvidia.com>
  • Loading branch information
2 people authored and EricWF committed Oct 22, 2024
1 parent c0d9cd6 commit a703c64
Show file tree
Hide file tree
Showing 9 changed files with 32 additions and 5 deletions.
2 changes: 2 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,8 @@ CUDA/HIP Language Changes

CUDA Support
^^^^^^^^^^^^
- Clang now supports CUDA SDK up to 12.6
- Added support for sm_100

AIX Support
^^^^^^^^^^^
Expand Down
10 changes: 8 additions & 2 deletions clang/include/clang/Basic/BuiltinsNVPTX.def
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@
#pragma push_macro("SM_89")
#pragma push_macro("SM_90")
#pragma push_macro("SM_90a")
#pragma push_macro("SM_100")
#define SM_100 "sm_100"
#define SM_90a "sm_90a"
#define SM_90 "sm_90|" SM_90a
#define SM_90 "sm_90|" SM_90a "|" SM_100
#define SM_89 "sm_89|" SM_90
#define SM_87 "sm_87|" SM_89
#define SM_86 "sm_86|" SM_87
Expand Down Expand Up @@ -63,7 +65,9 @@
#pragma push_macro("PTX83")
#pragma push_macro("PTX84")
#pragma push_macro("PTX85")
#define PTX85 "ptx85"
#pragma push_macro("PTX86")
#define PTX86 "ptx86"
#define PTX85 "ptx85|" PTX86
#define PTX84 "ptx84|" PTX85
#define PTX83 "ptx83|" PTX84
#define PTX82 "ptx82|" PTX83
Expand Down Expand Up @@ -1086,6 +1090,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
#pragma pop_macro("SM_89")
#pragma pop_macro("SM_90")
#pragma pop_macro("SM_90a")
#pragma pop_macro("SM_100")
#pragma pop_macro("PTX42")
#pragma pop_macro("PTX60")
#pragma pop_macro("PTX61")
Expand All @@ -1108,3 +1113,4 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
#pragma pop_macro("PTX83")
#pragma pop_macro("PTX84")
#pragma pop_macro("PTX85")
#pragma pop_macro("PTX86")
4 changes: 3 additions & 1 deletion clang/include/clang/Basic/Cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,10 @@ enum class CudaVersion {
CUDA_123,
CUDA_124,
CUDA_125,
CUDA_126,
FULLY_SUPPORTED = CUDA_123,
PARTIALLY_SUPPORTED =
CUDA_125, // Partially supported. Proceed with a warning.
CUDA_126, // Partially supported. Proceed with a warning.
NEW = 10000, // Too new. Issue a warning, but allow using it.
};
const char *CudaVersionToString(CudaVersion V);
Expand Down Expand Up @@ -78,6 +79,7 @@ enum class OffloadArch {
SM_89,
SM_90,
SM_90a,
SM_100,
GFX600,
GFX601,
GFX602,
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/Basic/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
CUDA_ENTRY(12, 3),
CUDA_ENTRY(12, 4),
CUDA_ENTRY(12, 5),
CUDA_ENTRY(12, 6),
{"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
{"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
};
Expand Down Expand Up @@ -96,6 +97,7 @@ static const OffloadArchToStringMap arch_names[] = {
SM(89), // Ada Lovelace
SM(90), // Hopper
SM(90a), // Hopper
SM(100), // Blackwell
GFX(600), // gfx600
GFX(601), // gfx601
GFX(602), // gfx602
Expand Down Expand Up @@ -221,6 +223,9 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
return CudaVersion::CUDA_118;
case OffloadArch::SM_90a:
return CudaVersion::CUDA_120;
case OffloadArch::SM_100:
return CudaVersion::NEW; // TODO: use specific CUDA version once it's
// public.
default:
llvm_unreachable("invalid enum");
}
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Basic/Targets/NVPTX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case OffloadArch::SM_90:
case OffloadArch::SM_90a:
return "900";
case OffloadArch::SM_100:
return "1000";
}
llvm_unreachable("unhandled OffloadArch");
}();
Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2274,6 +2274,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
case OffloadArch::SM_89:
case OffloadArch::SM_90:
case OffloadArch::SM_90a:
case OffloadArch::SM_100:
case OffloadArch::GFX600:
case OffloadArch::GFX601:
case OffloadArch::GFX602:
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/Driver/ToolChains/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ CudaVersion getCudaVersion(uint32_t raw_version) {
return CudaVersion::CUDA_124;
if (raw_version < 12060)
return CudaVersion::CUDA_125;
if (raw_version < 12070)
return CudaVersion::CUDA_126;
return CudaVersion::NEW;
}

Expand Down Expand Up @@ -669,6 +671,7 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
case CudaVersion::CUDA_##CUDA_VER: \
PtxFeature = "+ptx" #PTX_VER; \
break;
CASE_CUDA_VERSION(126, 85);
CASE_CUDA_VERSION(125, 85);
CASE_CUDA_VERSION(124, 84);
CASE_CUDA_VERSION(123, 83);
Expand All @@ -691,6 +694,10 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
CASE_CUDA_VERSION(91, 61);
CASE_CUDA_VERSION(90, 60);
#undef CASE_CUDA_VERSION
// TODO: Use specific CUDA version once it's public.
case clang::CudaVersion::NEW:
PtxFeature = "+ptx86";
break;
default:
PtxFeature = "+ptx42";
}
Expand Down
1 change: 1 addition & 0 deletions clang/test/Misc/target-invalid-cpu-note/nvptx.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
// CHECK-SAME: {{^}}, sm_89
// CHECK-SAME: {{^}}, sm_90
// CHECK-SAME: {{^}}, sm_90a
// CHECK-SAME: {{^}}, sm_100
// CHECK-SAME: {{^}}, gfx600
// CHECK-SAME: {{^}}, gfx601
// CHECK-SAME: {{^}}, gfx602
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTX.td
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ class FeaturePTX<int version>:
"Use PTX version " # version>;

foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in
60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90, 100] in
def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;

def SM90a: FeatureSM<"90a", 901>;

foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
70, 71, 72, 73, 74, 75, 76, 77, 78,
80, 81, 82, 83, 84, 85] in
80, 81, 82, 83, 84, 85, 86] in
def PTX#version: FeaturePTX<version>;

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -73,6 +73,7 @@ def : Proc<"sm_87", [SM87, PTX74]>;
def : Proc<"sm_89", [SM89, PTX78]>;
def : Proc<"sm_90", [SM90, PTX78]>;
def : Proc<"sm_90a", [SM90a, PTX80]>;
def : Proc<"sm_100", [SM100, PTX86]>;

def NVPTXInstrInfo : InstrInfo {
}
Expand Down

0 comments on commit a703c64

Please sign in to comment.