Skip to content

Commit

Permalink
Merge pull request #7510 from BradleyWood/xcro_flags
Browse files Browse the repository at this point in the history
x86: Add disableAVX2/512 options and check XCR0 for OS support
  • Loading branch information
0xdaryl authored Dec 11, 2024
2 parents ad265f2 + aac0321 commit 3b178a4
Show file tree
Hide file tree
Showing 9 changed files with 151 additions and 27 deletions.
35 changes: 35 additions & 0 deletions compiler/control/OMROptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,9 @@ TR::OptionTable OMR::Options::_jitOptions[] = {
{"disableAsyncCheckVersioning", "O\tdisable versioning of loops wrt async checks", SET_OPTION_BIT(TR_DisableAsyncCheckVersioning), "F"},
{"disableAsyncCompilation", "M\tdisable asynchronous compilation", SET_OPTION_BIT(TR_DisableAsyncCompilation), "F"},
{"disableAutoSIMD", "M\tdisable automatic vectorization of loops", SET_OPTION_BIT(TR_DisableAutoSIMD), "F"},
{"disableAVX", "C\tdisable avx and newer on x86", TR::Options::disableCPUFeatures, TR_DisableAVX, 0, "F"},
{"disableAVX2", "C\tdisable avx2 and newer on x86", TR::Options::disableCPUFeatures, TR_DisableAVX2, 0, "F"},
{"disableAVX512", "C\tdisable avx512 on x86", TR::Options::disableCPUFeatures, TR_DisableAVX512, 0, "F"},
{"disableBasicBlockExtension", "O\tdisable basic block extension", TR::Options::disableOptimization, basicBlockExtension, 0, "P"},
{"disableBasicBlockPeepHole", "O\tdisable basic blocks peepHole", SET_OPTION_BIT(TR_DisableBasicBlockPeepHole), "F"},
{"disableBCDArithChildOrdering", "O\tstress testing option -- do not reorder children of BCD arithmetic nodes", SET_OPTION_BIT(TR_DisableBCDArithChildOrdering), "F" },
Expand Down Expand Up @@ -554,6 +557,9 @@ TR::OptionTable OMR::Options::_jitOptions[] = {
{"disableSIMDUTF16BEEncoder", "M\tdisable inlining of SIMD UTF16 Big Endian encoder", SET_OPTION_BIT(TR_DisableSIMDUTF16BEEncoder), "F"},
{"disableSIMDUTF16LEEncoder", "M\tdisable inlining of SIMD UTF16 Little Endian encoder", SET_OPTION_BIT(TR_DisableSIMDUTF16LEEncoder), "F"},
{"disableSmartPlacementOfCodeCaches", "O\tdisable placement of code caches in memory so they are near each other and the DLLs", SET_OPTION_BIT(TR_DisableSmartPlacementOfCodeCaches), "F", NOT_IN_SUBSET},
{"disableSSE3", "C\tdisable sse 3 and newer on x86", TR::Options::disableCPUFeatures, TR_DisableSSE3, 0, "F"},
{"disableSSE4_1", "C\tdisable sse 4.1 and newer on x86", TR::Options::disableCPUFeatures, TR_DisableSSE4_1, 0, "F"},
{"disableSSE4_2", "C\tdisable sse 4.2 and newer on x86", TR::Options::disableCPUFeatures, TR_DisableSSE4_2, 0, "F"},
{"disableStableAnnotations", "M\tdisable recognition of @Stable", SET_OPTION_BIT(TR_DisableStableAnnotations), "F"},
{"disableStaticFinalFieldFolding", "O\tdisable generic static final field folding", TR::Options::disableOptimization, staticFinalFieldFolding, 0, "P"},
{"disableStoreOnCondition", "O\tdisable store on condition (STOC) code gen", SET_OPTION_BIT(TR_DisableStoreOnCondition), "F"},
Expand Down Expand Up @@ -4931,6 +4937,35 @@ OMR::Options::configureOptReporting(const char *option, void *base, TR::OptionTa
return option;
}

const char *
OMR::Options::disableCPUFeatures(const char *option, void *base, TR::OptionTable *entry)
{
TR::Options *options = (TR::Options*)base;
TR_CompilationOptions co = (TR_CompilationOptions)entry->parm1;
options->setOption(co);

/* When disabling SIMD, disable newer features too */

switch (co)
{
case TR_DisableSSE3:
options->setOption(TR_DisableSSE3);
case TR_DisableSSE4_1:
options->setOption(TR_DisableSSE4_1);
case TR_DisableSSE4_2:
options->setOption(TR_DisableSSE4_2);
case TR_DisableAVX:
options->setOption(TR_DisableAVX);
case TR_DisableAVX2:
options->setOption(TR_DisableAVX2);
case TR_DisableAVX512:
options->setOption(TR_DisableAVX512);
default:
break;
}

return option;
}

const char *OMR::Options::_verboseOptionNames[TR_NumVerboseOptions] =
{
Expand Down
13 changes: 7 additions & 6 deletions compiler/control/OMROptions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -400,13 +400,13 @@ enum TR_CompilationOptions
TR_EnableVectorAPIBoxing = 0x00010000 + 10,
TR_EnableSequentialLoadStoreWarm = 0x00020000 + 10,
TR_EnableSequentialLoadStoreCold = 0x00040000 + 10,
// Available = 0x00080000 + 10,
// Available = 0x00100000 + 10,
// Available = 0x00200000 + 10,
TR_DisableAVX = 0x00080000 + 10,
TR_DisableAVX2 = 0x00100000 + 10,
TR_DisableAVX512 = 0x00200000 + 10,
TR_ConservativeCompilation = 0x00400000 + 10,
// Available = 0x00800000 + 10,
// Available = 0x01000000 + 10,
// Available = 0x02000000 + 10,
TR_DisableSSE3 = 0x00800000 + 10,
TR_DisableSSE4_1 = 0x01000000 + 10,
TR_DisableSSE4_2 = 0x02000000 + 10,
TR_DisableNewX86VolatileSupport = 0x04000000 + 10,
// Available = 0x08000000 + 10,
// Available = 0x10000000 + 10,
Expand Down Expand Up @@ -2291,6 +2291,7 @@ class OMR_EXTENSIBLE Options
static const char *clearBitsFromStringSet(const char *option, void *base, TR::OptionTable *entry);

static const char *configureOptReporting(const char *option, void *base, TR::OptionTable *entry);
static const char *disableCPUFeatures(const char *option, void *base, TR::OptionTable *entry);

// Option processing helper functions
//
Expand Down
3 changes: 0 additions & 3 deletions compiler/x/codegen/OMRCodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,6 @@ OMR::X86::CodeGenerator::initializeX86(TR::Compilation *comp)
static bool disableX86TRTO = feGetEnv("TR_disableX86TRTO") != NULL;
if (!disableX86TRTO)
{
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1) == self()->getX86ProcessorInfo().supportsSSE4_1(), "supportsSSE4_1() failed\n");
if (comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1))
{
self()->setSupportsArrayTranslateTRTO();
Expand All @@ -433,8 +432,6 @@ OMR::X86::CodeGenerator::initializeX86(TR::Compilation *comp)
static bool disableX86TROT = feGetEnv("TR_disableX86TROT") != NULL;
if (!disableX86TROT)
{
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1) == self()->getX86ProcessorInfo().supportsSSE4_1(), "supportsSSE4_1() failed\n");
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE2) == self()->getX86ProcessorInfo().supportsSSE2(), "supportsSSE4_1() failed\n");
if (comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1))
{
self()->setSupportsArrayTranslateTROT();
Expand Down
2 changes: 0 additions & 2 deletions compiler/x/codegen/OMRInstOpCode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,6 @@ template <typename TBuffer> typename TBuffer::cursor_t OMR::X86::InstOpCode::OpC
TR::Instruction::REX rex(rexbits);
rex.W = rex_w;

TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.supportsAVX() == TR::CodeGenerator::getX86ProcessorInfo().supportsAVX(), "supportsAVX() failed\n");

if (enc != VEX_L___)
{
if (enc >> 2 && enc != VEX_LZ)
Expand Down
1 change: 0 additions & 1 deletion compiler/x/codegen/OMRTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5818,7 +5818,6 @@ TR::Register* OMR::X86::TreeEvaluator::floatingPointBinaryArithmeticEvaluator(TR
TR::Node* operandNode0 = node->getChild(0);
TR::Node* operandNode1 = node->getChild(1);

TR_ASSERT_FATAL(cg->comp()->compileRelocatableCode() || cg->comp()->isOutOfProcessCompilation() || cg->comp()->compilePortableCode() || cg->comp()->target().cpu.supportsAVX() == TR::CodeGenerator::getX86ProcessorInfo().supportsAVX(), "supportsAVX() failed\n");
bool useRegMemForm = cg->comp()->target().cpu.supportsAVX();

if (useRegMemForm)
Expand Down
2 changes: 0 additions & 2 deletions compiler/x/codegen/X86BinaryEncoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1547,8 +1547,6 @@ TR::X86RegInstruction::enlarge(int32_t requestedEnlargementSize, int32_t maxEnla
if (disableRexExpansion || cg()->comp()->getOption(TR_DisableZealousCodegenOpts))
return OMR::X86::EnlargementResult(0, 0);

TR_ASSERT_FATAL(cg()->comp()->compileRelocatableCode() || cg()->comp()->isOutOfProcessCompilation() || cg()->comp()->compilePortableCode() || cg()->comp()->target().cpu.supportsAVX() == cg()->getX86ProcessorInfo().supportsAVX(), "supportsAVX() failed\n");

if (getOpCode().info().supportsAVX() && cg()->comp()->target().cpu.supportsAVX())
return OMR::X86::EnlargementResult(0, 0); // REX expansion isn't allowed for AVX instructions

Expand Down
83 changes: 77 additions & 6 deletions compiler/x/env/OMRCPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,39 @@ OMR::X86::CPU::detect(OMRPortLibrary * const omrPortLib)
processorDescription.features[i] &= featureMasks.features[i];
}

bool disableAVX = true;
bool disableAVX512 = true;

// Check XCRO register for OS support of xmm/ymm/zmm
if (TRUE == omrsysinfo_processor_has_feature(&processorDescription, OMR_FEATURE_X86_OSXSAVE))
{
static const bool disableAVX = feGetEnv("TR_DisableAVX") != NULL;
if (((6 & _xgetbv(0)) != 6) || disableAVX) // '6' = mask for XCR0[2:1]='11b' (XMM state and YMM state are enabled)
{
// Unset OSXSAVE if not enabled via CR0
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_OSXSAVE, FALSE);
}
// '6' = mask for XCR0[2:1]='11b' (XMM state and YMM state are enabled)
disableAVX = ((6 & _xgetbv(0)) != 6);
// 'e6' = (mask for XCR0[7:5]='111b' (Opmask, ZMM_Hi256, Hi16_ZMM) + XCR0[2:1]='11b' (XMM/YMM))
disableAVX512 = ((0xe6 & _xgetbv(0)) != 0xe6);
}

if(disableAVX)
{
// Unset AVX/AVX2 if not enabled via CR0 or otherwise disabled
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX2, FALSE);
}

if (disableAVX512)
{
// Unset AVX-512 if not enabled via CR0 or otherwise disabled
// If other AVX-512 extensions are supported in the port library, they need to be disabled here
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512F, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512VL, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512BW, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512CD, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512DQ, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_BITALG, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_VBMI, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_VBMI2, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_VNNI, FALSE);
omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_VPOPCNTDQ, FALSE);
}

return TR::CPU(processorDescription);
Expand Down Expand Up @@ -254,9 +279,55 @@ OMR::X86::CPU::is(OMRProcessorArchitecture p)
return _processorDescription.processor == p;
}

bool
OMR::X86::CPU::is_feature_disabled(uint32_t feature)
{
TR_CompilationOptions option = (TR_CompilationOptions) 0;

switch (feature)
{
case OMR_FEATURE_X86_SSE3:
option = TR_DisableSSE3;
break;
case OMR_FEATURE_X86_SSE4_1:
option = TR_DisableSSE4_1;
break;
case OMR_FEATURE_X86_SSE4_2:
option = TR_DisableSSE4_2;
break;
case OMR_FEATURE_X86_AVX:
option = TR_DisableAVX;
break;
case OMR_FEATURE_X86_AVX2:
option = TR_DisableAVX2;
break;
case OMR_FEATURE_X86_AVX512F:
case OMR_FEATURE_X86_AVX512VL:
case OMR_FEATURE_X86_AVX512BW:
case OMR_FEATURE_X86_AVX512CD:
case OMR_FEATURE_X86_AVX512DQ:
case OMR_FEATURE_X86_AVX512ER:
case OMR_FEATURE_X86_AVX512PF:
case OMR_FEATURE_X86_AVX512_BITALG:
case OMR_FEATURE_X86_AVX512_IFMA:
case OMR_FEATURE_X86_AVX512_VBMI:
case OMR_FEATURE_X86_AVX512_VBMI2:
case OMR_FEATURE_X86_AVX512_VNNI:
case OMR_FEATURE_X86_AVX512_VPOPCNTDQ:
option = TR_DisableAVX512;
default:
break;
}

return option && compilation && compilation->getOption(option);
}

bool
OMR::X86::CPU::supportsFeature(uint32_t feature)
{
if (is_feature_disabled(feature))
return false;

if (TR::Compiler->omrPortLib == NULL)
return self()->supports_feature_old_api(feature);

Expand Down
1 change: 1 addition & 0 deletions compiler/x/env/OMRCPU.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ class OMR_EXTENSIBLE CPU : public OMR::CPU
bool supportsFeature(uint32_t feature);
bool supports_feature_old_api(uint32_t feature);
bool supports_feature_test(uint32_t feature);
bool is_feature_disabled(uint32_t feature);

/**
* @brief Returns name of the current processor
Expand Down
38 changes: 31 additions & 7 deletions compiler/x/runtime/X86Runtime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,39 @@ inline bool jitGetCPUID(TR_X86CPUIDBuffer* pBuffer)
pBuffer->_featureFlags8 = CPUInfo[EBX];
pBuffer->_featureFlags10 = CPUInfo[ECX];

// Check for XSAVE
bool disableAVX = true;
bool disableAVX512 = true;

// Check XCRO register for OS support of xmm/ymm/zmm
if(pBuffer->_featureFlags2 & TR_OSXSAVE)
{
static const bool disableAVX = feGetEnv("TR_DisableAVX") != NULL;
if(((6 & _xgetbv(0)) != 6) || disableAVX) // '6' = mask for XCR0[2:1]='11b' (XMM state and YMM state are enabled)
{
// Unset OSXSAVE if not enabled via CR0
pBuffer->_featureFlags2 &= ~TR_OSXSAVE;
}
// '6' = mask for XCR0[2:1]='11b' (XMM state and YMM state are enabled)
disableAVX = ((6 & _xgetbv(0)) != 6);
// 'e6' = (mask for XCR0[7:5]='111b' (Opmask, ZMM_Hi256, Hi16_ZMM) + XCR0[2:1]='11b' (XMM/YMM))
disableAVX512 = ((0xe6 & _xgetbv(0)) != 0xe6);
}

if(disableAVX)
{
// Unset AVX/AVX2 if not enabled via CR0 or otherwise disabled
pBuffer->_featureFlags2 &= ~TR_AVX;
pBuffer->_featureFlags8 &= ~TR_AVX2;
}

if (disableAVX512)
{
// Unset AVX-512 if not enabled via CR0 or otherwise disabled
// If other AVX-512 extensions are supported in the old cpuid API, they need to be disabled here
pBuffer->_featureFlags8 &= ~TR_AVX512F;
pBuffer->_featureFlags8 &= ~TR_AVX512VL;
pBuffer->_featureFlags8 &= ~TR_AVX512BW;
pBuffer->_featureFlags8 &= ~TR_AVX512CD;
pBuffer->_featureFlags8 &= ~TR_AVX512DQ;
pBuffer->_featureFlags10 &= ~TR_AVX512_BITALG;
pBuffer->_featureFlags10 &= ~TR_AVX512_VBMI;
pBuffer->_featureFlags10 &= ~TR_AVX512_VBMI2;
pBuffer->_featureFlags10 &= ~TR_AVX512_VNNI;
pBuffer->_featureFlags10 &= ~TR_AVX512_VPOPCNTDQ;
}

/* Mask out the bits the compiler does not care about.
Expand Down

0 comments on commit 3b178a4

Please sign in to comment.