From 4b10654ef953da715025499044dc798c5e406722 Mon Sep 17 00:00:00 2001 From: Daryl Maier Date: Thu, 12 Dec 2024 08:06:16 -0500 Subject: [PATCH] Revert "x86: Add disableAVX2/512 options and check XCR0 for OS support" --- compiler/control/OMROptions.cpp | 35 ---------- compiler/control/OMROptions.hpp | 13 ++-- compiler/x/codegen/OMRCodeGenerator.cpp | 3 + compiler/x/codegen/OMRInstOpCode.cpp | 2 + compiler/x/codegen/OMRTreeEvaluator.cpp | 1 + compiler/x/codegen/X86BinaryEncoding.cpp | 2 + compiler/x/env/OMRCPU.cpp | 83 ++---------------------- compiler/x/env/OMRCPU.hpp | 1 - compiler/x/runtime/X86Runtime.hpp | 38 ++--------- 9 files changed, 27 insertions(+), 151 deletions(-) diff --git a/compiler/control/OMROptions.cpp b/compiler/control/OMROptions.cpp index a4a6bcf7cab..b1e99731814 100644 --- a/compiler/control/OMROptions.cpp +++ b/compiler/control/OMROptions.cpp @@ -269,9 +269,6 @@ TR::OptionTable OMR::Options::_jitOptions[] = { {"disableAsyncCheckVersioning", "O\tdisable versioning of loops wrt async checks", SET_OPTION_BIT(TR_DisableAsyncCheckVersioning), "F"}, {"disableAsyncCompilation", "M\tdisable asynchronous compilation", SET_OPTION_BIT(TR_DisableAsyncCompilation), "F"}, {"disableAutoSIMD", "M\tdisable automatic vectorization of loops", SET_OPTION_BIT(TR_DisableAutoSIMD), "F"}, - {"disableAVX", "C\tdisable avx and newer on x86", TR::Options::disableCPUFeatures, TR_DisableAVX, 0, "F"}, - {"disableAVX2", "C\tdisable avx2 and newer on x86", TR::Options::disableCPUFeatures, TR_DisableAVX2, 0, "F"}, - {"disableAVX512", "C\tdisable avx512 on x86", TR::Options::disableCPUFeatures, TR_DisableAVX512, 0, "F"}, {"disableBasicBlockExtension", "O\tdisable basic block extension", TR::Options::disableOptimization, basicBlockExtension, 0, "P"}, {"disableBasicBlockPeepHole", "O\tdisable basic blocks peepHole", SET_OPTION_BIT(TR_DisableBasicBlockPeepHole), "F"}, {"disableBCDArithChildOrdering", "O\tstress testing option -- do not reorder children of BCD arithmetic nodes", SET_OPTION_BIT(TR_DisableBCDArithChildOrdering), "F" }, @@ -557,9 +554,6 @@ TR::OptionTable OMR::Options::_jitOptions[] = { {"disableSIMDUTF16BEEncoder", "M\tdisable inlining of SIMD UTF16 Big Endian encoder", SET_OPTION_BIT(TR_DisableSIMDUTF16BEEncoder), "F"}, {"disableSIMDUTF16LEEncoder", "M\tdisable inlining of SIMD UTF16 Little Endian encoder", SET_OPTION_BIT(TR_DisableSIMDUTF16LEEncoder), "F"}, {"disableSmartPlacementOfCodeCaches", "O\tdisable placement of code caches in memory so they are near each other and the DLLs", SET_OPTION_BIT(TR_DisableSmartPlacementOfCodeCaches), "F", NOT_IN_SUBSET}, - {"disableSSE3", "C\tdisable sse 3 and newer on x86", TR::Options::disableCPUFeatures, TR_DisableSSE3, 0, "F"}, - {"disableSSE4_1", "C\tdisable sse 4.1 and newer on x86", TR::Options::disableCPUFeatures, TR_DisableSSE4_1, 0, "F"}, - {"disableSSE4_2", "C\tdisable sse 4.2 and newer on x86", TR::Options::disableCPUFeatures, TR_DisableSSE4_2, 0, "F"}, {"disableStableAnnotations", "M\tdisable recognition of @Stable", SET_OPTION_BIT(TR_DisableStableAnnotations), "F"}, {"disableStaticFinalFieldFolding", "O\tdisable generic static final field folding", TR::Options::disableOptimization, staticFinalFieldFolding, 0, "P"}, {"disableStoreOnCondition", "O\tdisable store on condition (STOC) code gen", SET_OPTION_BIT(TR_DisableStoreOnCondition), "F"}, @@ -4937,35 +4931,6 @@ OMR::Options::configureOptReporting(const char *option, void *base, TR::OptionTa return option; } -const char * -OMR::Options::disableCPUFeatures(const char *option, void *base, TR::OptionTable *entry) - { - TR::Options *options = (TR::Options*)base; - TR_CompilationOptions co = (TR_CompilationOptions)entry->parm1; - options->setOption(co); - - /* When disabling SIMD, disable newer features too */ - - switch (co) - { - case TR_DisableSSE3: - options->setOption(TR_DisableSSE3); - case TR_DisableSSE4_1: - options->setOption(TR_DisableSSE4_1); - case TR_DisableSSE4_2: - options->setOption(TR_DisableSSE4_2); - case TR_DisableAVX: - options->setOption(TR_DisableAVX); - case TR_DisableAVX2: - options->setOption(TR_DisableAVX2); - case TR_DisableAVX512: - options->setOption(TR_DisableAVX512); - default: - break; - } - - return option; - } const char *OMR::Options::_verboseOptionNames[TR_NumVerboseOptions] = { diff --git a/compiler/control/OMROptions.hpp b/compiler/control/OMROptions.hpp index 99f8dba85b0..42af02eabec 100644 --- a/compiler/control/OMROptions.hpp +++ b/compiler/control/OMROptions.hpp @@ -400,13 +400,13 @@ enum TR_CompilationOptions TR_EnableVectorAPIBoxing = 0x00010000 + 10, TR_EnableSequentialLoadStoreWarm = 0x00020000 + 10, TR_EnableSequentialLoadStoreCold = 0x00040000 + 10, - TR_DisableAVX = 0x00080000 + 10, - TR_DisableAVX2 = 0x00100000 + 10, - TR_DisableAVX512 = 0x00200000 + 10, + // Available = 0x00080000 + 10, + // Available = 0x00100000 + 10, + // Available = 0x00200000 + 10, TR_ConservativeCompilation = 0x00400000 + 10, - TR_DisableSSE3 = 0x00800000 + 10, - TR_DisableSSE4_1 = 0x01000000 + 10, - TR_DisableSSE4_2 = 0x02000000 + 10, + // Available = 0x00800000 + 10, + // Available = 0x01000000 + 10, + // Available = 0x02000000 + 10, TR_DisableNewX86VolatileSupport = 0x04000000 + 10, // Available = 0x08000000 + 10, // Available = 0x10000000 + 10, @@ -2291,7 +2291,6 @@ class OMR_EXTENSIBLE Options static const char *clearBitsFromStringSet(const char *option, void *base, TR::OptionTable *entry); static const char *configureOptReporting(const char *option, void *base, TR::OptionTable *entry); - static const char *disableCPUFeatures(const char *option, void *base, TR::OptionTable *entry); // Option processing helper functions // diff --git a/compiler/x/codegen/OMRCodeGenerator.cpp b/compiler/x/codegen/OMRCodeGenerator.cpp index d8367502983..9ea35594895 100644 --- a/compiler/x/codegen/OMRCodeGenerator.cpp +++ b/compiler/x/codegen/OMRCodeGenerator.cpp @@ -424,6 +424,7 @@ OMR::X86::CodeGenerator::initializeX86(TR::Compilation *comp) static bool disableX86TRTO = feGetEnv("TR_disableX86TRTO") != NULL; if (!disableX86TRTO) { + TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1) == self()->getX86ProcessorInfo().supportsSSE4_1(), "supportsSSE4_1() failed\n"); if (comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1)) { self()->setSupportsArrayTranslateTRTO(); @@ -432,6 +433,8 @@ OMR::X86::CodeGenerator::initializeX86(TR::Compilation *comp) static bool disableX86TROT = feGetEnv("TR_disableX86TROT") != NULL; if (!disableX86TROT) { + TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1) == self()->getX86ProcessorInfo().supportsSSE4_1(), "supportsSSE4_1() failed\n"); + TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE2) == self()->getX86ProcessorInfo().supportsSSE2(), "supportsSSE4_1() failed\n"); if (comp->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE4_1)) { self()->setSupportsArrayTranslateTROT(); diff --git a/compiler/x/codegen/OMRInstOpCode.cpp b/compiler/x/codegen/OMRInstOpCode.cpp index 088e9e79158..925712630f0 100644 --- a/compiler/x/codegen/OMRInstOpCode.cpp +++ b/compiler/x/codegen/OMRInstOpCode.cpp @@ -103,6 +103,8 @@ template typename TBuffer::cursor_t OMR::X86::InstOpCode::OpC TR::Instruction::REX rex(rexbits); rex.W = rex_w; + TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.supportsAVX() == TR::CodeGenerator::getX86ProcessorInfo().supportsAVX(), "supportsAVX() failed\n"); + if (enc != VEX_L___) { if (enc >> 2 && enc != VEX_LZ) diff --git a/compiler/x/codegen/OMRTreeEvaluator.cpp b/compiler/x/codegen/OMRTreeEvaluator.cpp index dc93d962373..3de371978d9 100644 --- a/compiler/x/codegen/OMRTreeEvaluator.cpp +++ b/compiler/x/codegen/OMRTreeEvaluator.cpp @@ -5818,6 +5818,7 @@ TR::Register* OMR::X86::TreeEvaluator::floatingPointBinaryArithmeticEvaluator(TR TR::Node* operandNode0 = node->getChild(0); TR::Node* operandNode1 = node->getChild(1); + TR_ASSERT_FATAL(cg->comp()->compileRelocatableCode() || cg->comp()->isOutOfProcessCompilation() || cg->comp()->compilePortableCode() || cg->comp()->target().cpu.supportsAVX() == TR::CodeGenerator::getX86ProcessorInfo().supportsAVX(), "supportsAVX() failed\n"); bool useRegMemForm = cg->comp()->target().cpu.supportsAVX(); if (useRegMemForm) diff --git a/compiler/x/codegen/X86BinaryEncoding.cpp b/compiler/x/codegen/X86BinaryEncoding.cpp index db2c51bb98c..3b2798b9bce 100644 --- a/compiler/x/codegen/X86BinaryEncoding.cpp +++ b/compiler/x/codegen/X86BinaryEncoding.cpp @@ -1547,6 +1547,8 @@ TR::X86RegInstruction::enlarge(int32_t requestedEnlargementSize, int32_t maxEnla if (disableRexExpansion || cg()->comp()->getOption(TR_DisableZealousCodegenOpts)) return OMR::X86::EnlargementResult(0, 0); + TR_ASSERT_FATAL(cg()->comp()->compileRelocatableCode() || cg()->comp()->isOutOfProcessCompilation() || cg()->comp()->compilePortableCode() || cg()->comp()->target().cpu.supportsAVX() == cg()->getX86ProcessorInfo().supportsAVX(), "supportsAVX() failed\n"); + if (getOpCode().info().supportsAVX() && cg()->comp()->target().cpu.supportsAVX()) return OMR::X86::EnlargementResult(0, 0); // REX expansion isn't allowed for AVX instructions diff --git a/compiler/x/env/OMRCPU.cpp b/compiler/x/env/OMRCPU.cpp index 4ebbb181e24..7c203c46fe1 100644 --- a/compiler/x/env/OMRCPU.cpp +++ b/compiler/x/env/OMRCPU.cpp @@ -61,39 +61,14 @@ OMR::X86::CPU::detect(OMRPortLibrary * const omrPortLib) processorDescription.features[i] &= featureMasks.features[i]; } - bool disableAVX = true; - bool disableAVX512 = true; - - // Check XCRO register for OS support of xmm/ymm/zmm if (TRUE == omrsysinfo_processor_has_feature(&processorDescription, OMR_FEATURE_X86_OSXSAVE)) { - // '6' = mask for XCR0[2:1]='11b' (XMM state and YMM state are enabled) - disableAVX = ((6 & _xgetbv(0)) != 6); - // 'e6' = (mask for XCR0[7:5]='111b' (Opmask, ZMM_Hi256, Hi16_ZMM) + XCR0[2:1]='11b' (XMM/YMM)) - disableAVX512 = ((0xe6 & _xgetbv(0)) != 0xe6); - } - - if(disableAVX) - { - // Unset AVX/AVX2 if not enabled via CR0 or otherwise disabled - omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX, FALSE); - omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX2, FALSE); - } - - if (disableAVX512) - { - // Unset AVX-512 if not enabled via CR0 or otherwise disabled - // If other AVX-512 extensions are supported in the port library, they need to be disabled here - omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512F, FALSE); - omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512VL, FALSE); - omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512BW, FALSE); - omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512CD, FALSE); - omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512DQ, FALSE); - omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_BITALG, FALSE); - omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_VBMI, FALSE); - omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_VBMI2, FALSE); - omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_VNNI, FALSE); - omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_AVX512_VPOPCNTDQ, FALSE); + static const bool disableAVX = feGetEnv("TR_DisableAVX") != NULL; + if (((6 & _xgetbv(0)) != 6) || disableAVX) // '6' = mask for XCR0[2:1]='11b' (XMM state and YMM state are enabled) + { + // Unset OSXSAVE if not enabled via CR0 + omrsysinfo_processor_set_feature(&processorDescription, OMR_FEATURE_X86_OSXSAVE, FALSE); + } } return TR::CPU(processorDescription); @@ -279,55 +254,9 @@ OMR::X86::CPU::is(OMRProcessorArchitecture p) return _processorDescription.processor == p; } -bool -OMR::X86::CPU::is_feature_disabled(uint32_t feature) - { - TR_CompilationOptions option = (TR_CompilationOptions) 0; - - switch (feature) - { - case OMR_FEATURE_X86_SSE3: - option = TR_DisableSSE3; - break; - case OMR_FEATURE_X86_SSE4_1: - option = TR_DisableSSE4_1; - break; - case OMR_FEATURE_X86_SSE4_2: - option = TR_DisableSSE4_2; - break; - case OMR_FEATURE_X86_AVX: - option = TR_DisableAVX; - break; - case OMR_FEATURE_X86_AVX2: - option = TR_DisableAVX2; - break; - case OMR_FEATURE_X86_AVX512F: - case OMR_FEATURE_X86_AVX512VL: - case OMR_FEATURE_X86_AVX512BW: - case OMR_FEATURE_X86_AVX512CD: - case OMR_FEATURE_X86_AVX512DQ: - case OMR_FEATURE_X86_AVX512ER: - case OMR_FEATURE_X86_AVX512PF: - case OMR_FEATURE_X86_AVX512_BITALG: - case OMR_FEATURE_X86_AVX512_IFMA: - case OMR_FEATURE_X86_AVX512_VBMI: - case OMR_FEATURE_X86_AVX512_VBMI2: - case OMR_FEATURE_X86_AVX512_VNNI: - case OMR_FEATURE_X86_AVX512_VPOPCNTDQ: - option = TR_DisableAVX512; - default: - break; - } - - return option && compilation && compilation->getOption(option); - } - bool OMR::X86::CPU::supportsFeature(uint32_t feature) { - if (is_feature_disabled(feature)) - return false; - if (TR::Compiler->omrPortLib == NULL) return self()->supports_feature_old_api(feature); diff --git a/compiler/x/env/OMRCPU.hpp b/compiler/x/env/OMRCPU.hpp index ac9291c36aa..d2b842ce3c9 100644 --- a/compiler/x/env/OMRCPU.hpp +++ b/compiler/x/env/OMRCPU.hpp @@ -146,7 +146,6 @@ class OMR_EXTENSIBLE CPU : public OMR::CPU bool supportsFeature(uint32_t feature); bool supports_feature_old_api(uint32_t feature); bool supports_feature_test(uint32_t feature); - bool is_feature_disabled(uint32_t feature); /** * @brief Returns name of the current processor diff --git a/compiler/x/runtime/X86Runtime.hpp b/compiler/x/runtime/X86Runtime.hpp index 4205baaebcf..7a59d52bbb4 100644 --- a/compiler/x/runtime/X86Runtime.hpp +++ b/compiler/x/runtime/X86Runtime.hpp @@ -87,39 +87,15 @@ inline bool jitGetCPUID(TR_X86CPUIDBuffer* pBuffer) pBuffer->_featureFlags8 = CPUInfo[EBX]; pBuffer->_featureFlags10 = CPUInfo[ECX]; - bool disableAVX = true; - bool disableAVX512 = true; - - // Check XCRO register for OS support of xmm/ymm/zmm + // Check for XSAVE if(pBuffer->_featureFlags2 & TR_OSXSAVE) { - // '6' = mask for XCR0[2:1]='11b' (XMM state and YMM state are enabled) - disableAVX = ((6 & _xgetbv(0)) != 6); - // 'e6' = (mask for XCR0[7:5]='111b' (Opmask, ZMM_Hi256, Hi16_ZMM) + XCR0[2:1]='11b' (XMM/YMM)) - disableAVX512 = ((0xe6 & _xgetbv(0)) != 0xe6); - } - - if(disableAVX) - { - // Unset AVX/AVX2 if not enabled via CR0 or otherwise disabled - pBuffer->_featureFlags2 &= ~TR_AVX; - pBuffer->_featureFlags8 &= ~TR_AVX2; - } - - if (disableAVX512) - { - // Unset AVX-512 if not enabled via CR0 or otherwise disabled - // If other AVX-512 extensions are supported in the old cpuid API, they need to be disabled here - pBuffer->_featureFlags8 &= ~TR_AVX512F; - pBuffer->_featureFlags8 &= ~TR_AVX512VL; - pBuffer->_featureFlags8 &= ~TR_AVX512BW; - pBuffer->_featureFlags8 &= ~TR_AVX512CD; - pBuffer->_featureFlags8 &= ~TR_AVX512DQ; - pBuffer->_featureFlags10 &= ~TR_AVX512_BITALG; - pBuffer->_featureFlags10 &= ~TR_AVX512_VBMI; - pBuffer->_featureFlags10 &= ~TR_AVX512_VBMI2; - pBuffer->_featureFlags10 &= ~TR_AVX512_VNNI; - pBuffer->_featureFlags10 &= ~TR_AVX512_VPOPCNTDQ; + static const bool disableAVX = feGetEnv("TR_DisableAVX") != NULL; + if(((6 & _xgetbv(0)) != 6) || disableAVX) // '6' = mask for XCR0[2:1]='11b' (XMM state and YMM state are enabled) + { + // Unset OSXSAVE if not enabled via CR0 + pBuffer->_featureFlags2 &= ~TR_OSXSAVE; + } } /* Mask out the bits the compiler does not care about.