-
Notifications
You must be signed in to change notification settings - Fork 1k
/
Copy pathcpu_isa_traits.cpp
392 lines (346 loc) · 12.5 KB
/
cpu_isa_traits.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
/*******************************************************************************
* Copyright 2019-2024 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <cstring>
#include <mutex>
#include <string>
#include "common/utils.hpp"
#include "cpu/x64/cpu_isa_traits.hpp"
namespace dnnl {
namespace impl {
namespace cpu {
namespace x64 {
namespace {
cpu_isa_t init_max_cpu_isa() {
cpu_isa_t max_cpu_isa_val = isa_all;
// A macro applies only to env variable enabling/disabling.
#ifdef DNNL_ENABLE_MAX_CPU_ISA
static std::string isa_val = getenv_string_user("MAX_CPU_ISA");
// handle aliases
if (isa_val == "avx512_core_fp16")
isa_val = "avx10_1_512";
else if (isa_val == "avx512_core_amx")
isa_val = "avx10_1_512_amx";
else if (isa_val == "avx512_core_amx_fp16")
isa_val = "avx10_1_512_amx_fp16";
#else
static std::string isa_val;
#endif
if (!isa_val.empty()) {
#define IF_HANDLE_CASE(cpu_isa) \
if (isa_val.compare(cpu_isa_traits<cpu_isa>::user_option_env) == 0) \
max_cpu_isa_val = cpu_isa
#define ELSEIF_HANDLE_CASE(cpu_isa) else IF_HANDLE_CASE(cpu_isa)
IF_HANDLE_CASE(isa_all);
ELSEIF_HANDLE_CASE(sse41);
ELSEIF_HANDLE_CASE(avx);
ELSEIF_HANDLE_CASE(avx2);
ELSEIF_HANDLE_CASE(avx2_vnni);
ELSEIF_HANDLE_CASE(avx2_vnni_2);
ELSEIF_HANDLE_CASE(avx512_core);
ELSEIF_HANDLE_CASE(avx512_core_vnni);
ELSEIF_HANDLE_CASE(avx512_core_bf16);
ELSEIF_HANDLE_CASE(avx512_core_fp16);
ELSEIF_HANDLE_CASE(avx512_core_amx);
ELSEIF_HANDLE_CASE(avx512_core_amx_fp16);
#undef IF_HANDLE_CASE
#undef ELSEIF_HANDLE_CASE
}
return max_cpu_isa_val;
}
set_once_before_first_get_setting_t<cpu_isa_t> &max_cpu_isa() {
static set_once_before_first_get_setting_t<cpu_isa_t> max_cpu_isa_setting(
init_max_cpu_isa());
return max_cpu_isa_setting;
}
dnnl_cpu_isa_hints_t init_cpu_isa_hints() {
dnnl_cpu_isa_hints_t cpu_isa_hints_val = dnnl_cpu_isa_no_hints;
// A macro applies only to env variable enabling/disabling.
#ifdef DNNL_ENABLE_CPU_ISA_HINTS
static std::string hints_val = getenv_string_user("CPU_ISA_HINTS");
#else
static std::string hints_val;
#endif
if (!hints_val.empty()) {
if (hints_val.compare("prefer_ymm") == 0)
cpu_isa_hints_val = dnnl_cpu_isa_prefer_ymm;
}
return cpu_isa_hints_val;
}
set_once_before_first_get_setting_t<dnnl_cpu_isa_hints_t> &cpu_isa_hints() {
static set_once_before_first_get_setting_t<dnnl_cpu_isa_hints_t>
cpu_isa_hints_setting(init_cpu_isa_hints());
return cpu_isa_hints_setting;
}
} // namespace
struct isa_info_t {
isa_info_t(cpu_isa_t aisa) : isa(aisa) {};
// this converter is needed as code base defines certain ISAs
// that the library does not expose (e.g. avx512_core_bf16_ymm),
// so the internal and external enum types do not coincide.
dnnl_cpu_isa_t convert_to_public_enum(void) const {
switch (isa) {
case avx512_core_amx_fp16: return dnnl_cpu_isa_avx512_core_amx_fp16;
case avx512_core_amx: return dnnl_cpu_isa_avx512_core_amx;
case avx512_core_fp16: return dnnl_cpu_isa_avx512_core_fp16;
case avx512_core_bf16_ymm: // fallback to avx512_core_bf16
case avx512_core_bf16: return dnnl_cpu_isa_avx512_core_bf16;
case avx512_core_vnni: return dnnl_cpu_isa_avx512_core_vnni;
case avx512_core: return dnnl_cpu_isa_avx512_core;
case avx2_vnni_2: return dnnl_cpu_isa_avx2_vnni_2;
case avx2_vnni: return dnnl_cpu_isa_avx2_vnni;
case avx2: return dnnl_cpu_isa_avx2;
case avx: return dnnl_cpu_isa_avx;
case sse41: return dnnl_cpu_isa_sse41;
default: return dnnl_cpu_isa_default;
}
}
const char *get_name() const {
switch (isa) {
case avx512_core_amx_fp16:
return "Intel AVX-512 with float16, Intel DL Boost and "
"bfloat16 support and Intel AMX with bfloat16, float16 "
"and 8-bit integer support";
case avx512_core_amx:
return "Intel AVX-512 with float16, Intel DL Boost and "
"bfloat16 support and Intel AMX with bfloat16 and 8-bit "
"integer support";
case avx512_core_fp16:
return "Intel AVX-512 with float16, Intel DL Boost and "
"bfloat16 support ";
case avx512_core_bf16_ymm:
return "Intel AVX-512 with Intel DL Boost and bfloat16 support "
"on Ymm/Zmm";
case avx512_core_bf16:
return "Intel AVX-512 with Intel DL Boost and bfloat16 support";
case avx512_core_vnni: return "Intel AVX-512 with Intel DL Boost";
case avx512_core:
return "Intel AVX-512 with AVX512BW, AVX512VL, and AVX512DQ "
"extensions";
case avx2_vnni_2:
return "Intel AVX2 with Intel DL Boost, float16 and bfloat16 "
"support";
case avx2_vnni: return "Intel AVX2 with Intel DL Boost";
case avx2: return "Intel AVX2";
case avx: return "Intel AVX";
case sse41: return "Intel SSE4.1";
default: return "Intel 64";
}
}
cpu_isa_t isa;
};
std::string isa2str(cpu_isa_t isa) {
std::string s = JIT_IMPL_NAME_HELPER("", isa, "");
return s;
}
static isa_info_t get_isa_info_t(void) {
// descending order due to mayiuse check
#define HANDLE_CASE(cpu_isa) \
if (mayiuse(cpu_isa)) return isa_info_t(cpu_isa);
HANDLE_CASE(avx512_core_amx_fp16);
HANDLE_CASE(avx512_core_amx);
HANDLE_CASE(avx512_core_fp16);
HANDLE_CASE(avx512_core_bf16_ymm);
HANDLE_CASE(avx512_core_bf16);
HANDLE_CASE(avx512_core_vnni);
HANDLE_CASE(avx512_core);
HANDLE_CASE(avx2_vnni_2);
HANDLE_CASE(avx2_vnni);
HANDLE_CASE(avx2);
HANDLE_CASE(avx);
HANDLE_CASE(sse41);
#undef HANDLE_CASE
return isa_info_t(isa_undef);
}
const char *get_isa_info() {
return get_isa_info_t().get_name();
}
cpu_isa_t get_max_cpu_isa() {
return get_isa_info_t().isa;
}
cpu_isa_t get_max_cpu_isa_mask(bool soft) {
return max_cpu_isa().get(soft);
}
dnnl_cpu_isa_hints_t get_cpu_isa_hints(bool soft) {
MAYBE_UNUSED(soft);
return cpu_isa_hints().get(soft);
}
status_t set_max_cpu_isa(dnnl_cpu_isa_t isa) {
using namespace dnnl::impl::status;
using namespace dnnl::impl;
using namespace dnnl::impl::cpu;
cpu_isa_t isa_to_set = isa_undef;
#define HANDLE_CASE(cpu_isa) \
case cpu_isa_traits<cpu_isa>::user_option_val: isa_to_set = cpu_isa; break;
switch (isa) {
HANDLE_CASE(isa_all);
HANDLE_CASE(sse41);
HANDLE_CASE(avx);
HANDLE_CASE(avx2);
HANDLE_CASE(avx2_vnni);
HANDLE_CASE(avx2_vnni_2);
HANDLE_CASE(avx512_core);
HANDLE_CASE(avx512_core_vnni);
HANDLE_CASE(avx512_core_bf16);
HANDLE_CASE(avx512_core_amx);
HANDLE_CASE(avx512_core_fp16);
HANDLE_CASE(avx512_core_amx_fp16);
default: return invalid_arguments;
}
assert(isa_to_set != isa_undef);
#undef HANDLE_CASE
if (max_cpu_isa().set(isa_to_set))
return success;
else
return invalid_arguments;
}
dnnl_cpu_isa_t get_effective_cpu_isa() {
return get_isa_info_t().convert_to_public_enum();
}
status_t set_cpu_isa_hints(dnnl_cpu_isa_hints_t isa_hints) {
using namespace dnnl::impl::status;
using namespace dnnl::impl;
using namespace dnnl::impl::cpu;
if (cpu_isa_hints().set(isa_hints))
return success;
else
return runtime_error;
}
namespace amx {
int get_max_palette() {
if (mayiuse(amx_tile)) {
static const unsigned int EAX = []() {
unsigned int data[4] = {};
Xbyak::util::Cpu::getCpuidEx(0x1D, 0, data);
return data[0];
}();
return EAX;
} else {
return 0;
}
}
int get_target_palette() {
constexpr int max_supported_palette = 1;
return nstl::min(max_supported_palette, get_max_palette());
}
namespace {
enum class info_kind_t { max_tiles, max_column_bytes, max_rows };
std::vector<int> get_palettes_info(info_kind_t info_kind) {
std::vector<int> palettes_info;
for (int p = 1; p <= get_max_palette(); p++) {
unsigned int data[4] = {};
const unsigned int &EBX = data[1];
const unsigned int &ECX = data[2];
Xbyak::util::Cpu::getCpuidEx(0x1D, p, data);
switch (info_kind) {
case info_kind_t::max_tiles:
palettes_info.push_back(EBX >> 16);
break;
case info_kind_t::max_column_bytes:
palettes_info.push_back((EBX << 16) >> 16);
break;
case info_kind_t::max_rows:
palettes_info.push_back((ECX << 16) >> 16);
break;
default: assert(!"unknown info_kind"); break;
}
}
assert((int)palettes_info.size() == get_max_palette());
return palettes_info;
}
} // namespace
int get_max_tiles(int palette) {
if (mayiuse(amx_tile)) {
if (palette > get_max_palette() || palette <= 0) return -1;
static const std::vector<int> palettes
= get_palettes_info(info_kind_t::max_tiles);
return palettes.at(palette - 1);
} else {
return 0;
}
}
int get_max_column_bytes(int palette) {
if (mayiuse(amx_tile)) {
if (palette > get_max_palette() || palette <= 0) return -1;
static const std::vector<int> palettes
= get_palettes_info(info_kind_t::max_column_bytes);
return palettes.at(palette - 1);
} else {
return 0;
}
}
int get_max_rows(int palette) {
if (mayiuse(amx_tile)) {
if (palette > get_max_palette() || palette <= 0) return -1;
static const std::vector<int> palettes
= get_palettes_info(info_kind_t::max_rows);
return palettes.at(palette - 1);
} else {
return 0;
}
}
namespace {
#ifdef __linux__
#include <sys/syscall.h>
#define XFEATURE_XTILECFG 17
#define XFEATURE_XTILEDATA 18
#define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG)
#define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA)
#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA)
#define ARCH_GET_XCOMP_PERM 0x1022
#define ARCH_REQ_XCOMP_PERM 0x1023
bool init() {
unsigned long bitmask = 0;
long status = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask);
if (0 != status) return false;
if (bitmask & XFEATURE_MASK_XTILEDATA) return true;
status = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA);
if (0 != status)
return false; // XFEATURE_XTILEDATA setup is failed, TMUL usage is not allowed
status = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask);
// XFEATURE_XTILEDATA setup is failed, can't use TMUL
if (0 != status || !(bitmask & XFEATURE_MASK_XTILEDATA)) return false;
// XFEATURE_XTILEDATA set successfully, TMUL usage is allowed
return true;
}
#elif defined(_WIN32)
bool init() {
// XSAVE feature must be supported in order to check AMX state.
const bool xsave_supported = cpu().has(Xbyak::util::Cpu::tOSXSAVE);
if (!xsave_supported) return false;
// AMX state is controlled by TILECFG and TILEDATA features defined in
// XCR0[18:17].
uint64_t xcr0_features = Xbyak::util::Cpu::getXfeature();
return ((xcr0_features >> 17) & 3) == 3;
}
#else
bool init() {
// Disable AMX by default to avoid potential crashes.
return false;
}
#endif
set_once_before_first_get_setting_t<bool> &amx_setting() {
static set_once_before_first_get_setting_t<bool> setting(init());
return setting;
}
} // namespace
bool is_available() {
return amx_setting().get();
}
} // namespace amx
} // namespace x64
} // namespace cpu
} // namespace impl
} // namespace dnnl