-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[CUDA] Add support for CUDA-12.3 and sm_90a #74895
Merged
Merged
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
llvmbot
added
clang
Clang issues not falling into any other category
clang:driver
'clang' and 'clang++' user-facing binaries. Not 'clang-cl'
clang:frontend
Language frontend issues, e.g. anything involving "Sema"
clang:codegen
clang:openmp
OpenMP related changes to Clang
labels
Dec 8, 2023
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-codegen Author: Artem Belevich (Artem-B) ChangesFull diff: https://github.com/llvm/llvm-project/pull/74895.diff 11 Files Affected:
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 89ea2f0930cec..1bf68a46a64da 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -937,6 +937,9 @@ CUDA/HIP Language Changes
CUDA Support
^^^^^^^^^^^^
+- Clang now supports CUDA SDK up to 12.3
+- Added support for sm_90a
+
AIX Support
^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def
index d74a7d1e55dd2..0f2e8260143be 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.def
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.def
@@ -26,7 +26,9 @@
#pragma push_macro("SM_87")
#pragma push_macro("SM_89")
#pragma push_macro("SM_90")
-#define SM_90 "sm_90"
+#pragma push_macro("SM_90a")
+#define SM_90a "sm_90a"
+#define SM_90 "sm_90|" SM_90a
#define SM_89 "sm_89|" SM_90
#define SM_87 "sm_87|" SM_89
#define SM_86 "sm_86|" SM_87
@@ -56,7 +58,11 @@
#pragma push_macro("PTX78")
#pragma push_macro("PTX80")
#pragma push_macro("PTX81")
-#define PTX81 "ptx81"
+#pragma push_macro("PTX82")
+#pragma push_macro("PTX83")
+#define PTX83 "ptx83"
+#define PTX82 "ptx82|" PTX83
+#define PTX81 "ptx81|" PTX82
#define PTX80 "ptx80|" PTX81
#define PTX78 "ptx78|" PTX80
#define PTX77 "ptx77|" PTX78
@@ -1055,6 +1061,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
#pragma pop_macro("SM_87")
#pragma pop_macro("SM_89")
#pragma pop_macro("SM_90")
+#pragma pop_macro("SM_90a")
#pragma pop_macro("PTX42")
#pragma pop_macro("PTX60")
#pragma pop_macro("PTX61")
@@ -1072,3 +1079,5 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78))
#pragma pop_macro("PTX78")
#pragma pop_macro("PTX80")
#pragma pop_macro("PTX81")
+#pragma pop_macro("PTX82")
+#pragma pop_macro("PTX83")
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index 2d912bdbbd1bc..916cb4b7ef34a 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -39,9 +39,11 @@ enum class CudaVersion {
CUDA_118,
CUDA_120,
CUDA_121,
- FULLY_SUPPORTED = CUDA_118,
+ CUDA_122,
+ CUDA_123,
+ FULLY_SUPPORTED = CUDA_123,
PARTIALLY_SUPPORTED =
- CUDA_121, // Partially supported. Proceed with a warning.
+ CUDA_123, // Partially supported. Proceed with a warning.
NEW = 10000, // Too new. Issue a warning, but allow using it.
};
const char *CudaVersionToString(CudaVersion V);
@@ -71,6 +73,7 @@ enum class CudaArch {
SM_87,
SM_89,
SM_90,
+ SM_90a,
GFX600,
GFX601,
GFX602,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 65840b9f20252..1b1da6a1356f2 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -39,6 +39,8 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
CUDA_ENTRY(11, 8),
CUDA_ENTRY(12, 0),
CUDA_ENTRY(12, 1),
+ CUDA_ENTRY(12, 2),
+ CUDA_ENTRY(12, 3),
{"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
{"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
};
@@ -93,6 +95,7 @@ static const CudaArchToStringMap arch_names[] = {
SM(87), // Jetson/Drive AGX Orin
SM(89), // Ada Lovelace
SM(90), // Hopper
+ SM(90a), // Hopper
GFX(600), // gfx600
GFX(601), // gfx601
GFX(602), // gfx602
@@ -209,6 +212,8 @@ CudaVersion MinVersionForCudaArch(CudaArch A) {
case CudaArch::SM_89:
case CudaArch::SM_90:
return CudaVersion::CUDA_118;
+ case CudaArch::SM_90a:
+ return CudaVersion::CUDA_120;
default:
llvm_unreachable("invalid enum");
}
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 3a4a75b0348f2..5c601812f6175 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -262,11 +262,14 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case CudaArch::SM_89:
return "890";
case CudaArch::SM_90:
+ case CudaArch::SM_90a:
return "900";
}
llvm_unreachable("unhandled CudaArch");
}();
Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
+ if (GPU == CudaArch::SM_90a)
+ Builder.defineMacro("__CUDA_ARCH_FEAT_SM90_ALL", "1");
}
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 293ccaa3413cd..299ee1460b3db 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -3483,6 +3483,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
case CudaArch::SM_87:
case CudaArch::SM_89:
case CudaArch::SM_90:
+ case CudaArch::SM_90a:
case CudaArch::GFX600:
case CudaArch::GFX601:
case CudaArch::GFX602:
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index e95ff98e6c940..ef1e77974c1ea 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -78,6 +78,10 @@ CudaVersion getCudaVersion(uint32_t raw_version) {
return CudaVersion::CUDA_120;
if (raw_version < 12020)
return CudaVersion::CUDA_121;
+ if (raw_version < 12030)
+ return CudaVersion::CUDA_122;
+ if (raw_version < 12040)
+ return CudaVersion::CUDA_123;
return CudaVersion::NEW;
}
@@ -671,6 +675,8 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
case CudaVersion::CUDA_##CUDA_VER: \
PtxFeature = "+ptx" #PTX_VER; \
break;
+ CASE_CUDA_VERSION(123, 83);
+ CASE_CUDA_VERSION(122, 82);
CASE_CUDA_VERSION(121, 81);
CASE_CUDA_VERSION(120, 80);
CASE_CUDA_VERSION(118, 78);
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index c7146e63add5f..5475b1d8bd052 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -29,7 +29,7 @@
// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
// NVPTX: error: unknown target CPU 'not-a-cpu'
-// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201{{$}}
+// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201{{$}}
// RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600
// R600: error: unknown target CPU 'not-a-cpu'
diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td
index 02fa2a4ee81ec..f2a4ce381b40b 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/llvm/lib/Target/NVPTX/NVPTX.td
@@ -24,23 +24,24 @@ include "NVPTXInstrInfo.td"
// TableGen in NVPTXGenSubtarget.inc.
//===----------------------------------------------------------------------===//
-class FeatureSM<int version>:
- SubtargetFeature<"sm_"# version, "SmVersion",
- "" # version,
- "Target SM " # version>;
-def SM90a: FeatureSM<90>;
+class FeatureSM<string sm, int value>:
+ SubtargetFeature<"sm_"# sm, "FullSmVersion",
+ "" # value,
+ "Target SM " # sm>;
class FeaturePTX<int version>:
SubtargetFeature<"ptx"# version, "PTXVersion",
"" # version,
"Use PTX version " # version>;
-foreach version = [20, 21, 30, 32, 35, 37, 50, 52, 53,
- 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in
- def SM#version: FeatureSM<version>;
+foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
+ 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in
+ def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;
+
+def SM90a: FeatureSM<"90a", 901>;
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 63, 64, 65,
- 70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81] in
+ 70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81, 82, 83] in
def PTX#version: FeaturePTX<version>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 7fa64af196b93..420065585b384 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -36,6 +36,11 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
ParseSubtargetFeatures(TargetName, /*TuneCPU*/ TargetName, FS);
+ // Re-map SM version numbers, SmVersion carries the regular SMs which do
+ // have relative order, while FullSmVersion allows distinguishing sm_90 from
+ // sm_90a, which would *not* be a subset of sm_91.
+ SmVersion = getSmVersion();
+
// Set default to PTX 6.0 (CUDA 9.0)
if (PTXVersion == 0) {
PTXVersion = 60;
@@ -48,7 +53,7 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const NVPTXTargetMachine &TM)
: NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
- SmVersion(20), TM(TM),
+ FullSmVersion(200), SmVersion(getSmVersion()), TM(TM),
TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {}
bool NVPTXSubtarget::hasImageHandles() const {
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 93af11c258b48..951962d1e68be 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -35,7 +35,12 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
// PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31
unsigned PTXVersion;
- // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31
+ // Full SM version x.y is represented as 100*x+10*y+feature, e.g. 3.1 == 310
+ // sm_90a == 901
+ unsigned int FullSmVersion;
+
+ // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31. Derived from
+ // FullSmVersion.
unsigned int SmVersion;
const NVPTXTargetMachine &TM;
@@ -80,8 +85,10 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
bool allowFP16Math() const;
bool hasMaskOperator() const { return PTXVersion >= 71; }
bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; }
- unsigned int getSmVersion() const { return SmVersion; }
+ unsigned int getSmVersion() const { return FullSmVersion / 10; }
+ unsigned int getFullSmVersion() const { return FullSmVersion; }
std::string getTargetName() const { return TargetName; }
+ bool isSm90a() const { return getFullSmVersion() == 901; }
// Get maximum value of required alignments among the supported data types.
// From the PTX ISA doc, section 8.2.3:
|
@ezhulenev FYI. |
jhuber6
reviewed
Dec 8, 2023
|
jhuber6
approved these changes
Dec 9, 2023
Tested the changes with cuda test-suite, with cuda-12.1 and 12.3 targeting |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Labels
clang:codegen
clang:driver
'clang' and 'clang++' user-facing binaries. Not 'clang-cl'
clang:frontend
Language frontend issues, e.g. anything involving "Sema"
clang:openmp
OpenMP related changes to Clang
clang
Clang issues not falling into any other category
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.