Skip to content

Commit

Permalink
Fix an issue with the last level cache values on Linux running on cer…
Browse files Browse the repository at this point in the history
…tain AMD Processors (#108492)
  • Loading branch information
mrsharm authored Oct 11, 2024
1 parent 3e268fb commit 4f69316
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 57 deletions.
3 changes: 2 additions & 1 deletion src/coreclr/gc/gcconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ class GCConfigStringHolder
STRING_CONFIG(GCPath, "GCPath", "System.GC.Path", "Specifies the path of the standalone GC implementation.") \
INT_CONFIG (GCSpinCountUnit, "GCSpinCountUnit", NULL, 0, "Specifies the spin count unit used by the GC.") \
INT_CONFIG (GCDynamicAdaptationMode, "GCDynamicAdaptationMode", "System.GC.DynamicAdaptationMode", 1, "Enable the GC to dynamically adapt to application sizes.") \
INT_CONFIG (GCDTargetTCP, "GCDTargetTCP", "System.GC.DTargetTCP", 0, "Specifies the target tcp for DATAS")
INT_CONFIG (GCDTargetTCP, "GCDTargetTCP", "System.GC.DTargetTCP", 0, "Specifies the target tcp for DATAS") \
BOOL_CONFIG (GCCacheSizeFromSysConf, "GCCacheSizeFromSysConf", NULL, false, "Specifies using sysconf to retrieve the last level cache size for Unix.")

// This class is responsible for retreiving configuration information
// for how the GC should operate.
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/gc/unix/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
set(CMAKE_INCLUDE_CURRENT_DIR ON)
include_directories("../env")
include_directories("..")

include(configure.cmake)

Expand Down
147 changes: 91 additions & 56 deletions src/coreclr/gc/unix/gcenv.unix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
#include "gcenv.structs.h"
#include "gcenv.base.h"
#include "gcenv.os.h"
#include "gcenv.ee.h"
#include "gcenv.unix.inl"
#include "volatile.h"
#include "gcconfig.h"
#include "numasupport.h"
#include <minipal/thread.h>

Expand Down Expand Up @@ -852,10 +854,10 @@ bool ReadMemoryValueFromFile(const char* filename, uint64_t* val)
return result;
}

static size_t GetLogicalProcessorCacheSizeFromOS()
static void GetLogicalProcessorCacheSizeFromSysConf(size_t* cacheLevel, size_t* cacheSize)
{
size_t cacheLevel = 0;
size_t cacheSize = 0;
assert (cacheLevel != nullptr);
assert (cacheSize != nullptr);

#if defined(_SC_LEVEL1_DCACHE_SIZE) || defined(_SC_LEVEL2_CACHE_SIZE) || defined(_SC_LEVEL3_CACHE_SIZE) || defined(_SC_LEVEL4_CACHE_SIZE)
const int cacheLevelNames[] =
Expand All @@ -871,47 +873,105 @@ static size_t GetLogicalProcessorCacheSizeFromOS()
long size = sysconf(cacheLevelNames[i]);
if (size > 0)
{
cacheSize = (size_t)size;
cacheLevel = i + 1;
*cacheSize = (size_t)size;
*cacheLevel = i + 1;
break;
}
}
#endif
}

static void GetLogicalProcessorCacheSizeFromSysFs(size_t* cacheLevel, size_t* cacheSize)
{
assert (cacheLevel != nullptr);
assert (cacheSize != nullptr);

#if defined(TARGET_LINUX) && !defined(HOST_ARM) && !defined(HOST_X86)
if (cacheSize == 0)
//
// Retrieve cachesize via sysfs by reading the file /sys/devices/system/cpu/cpu0/cache/index{LastLevelCache}/size
// for the platform. Currently musl and arm64 should be only cases to use
// this method to determine cache size.
//
size_t level;
char path_to_size_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/size";
char path_to_level_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/level";
int index = 40;
assert(path_to_size_file[index] == '-');
assert(path_to_level_file[index] == '-');

for (int i = 0; i < 5; i++)
{
//
// Fallback to retrieve cachesize via /sys/.. if sysconf was not available
// for the platform. Currently musl and arm64 should be only cases to use
// this method to determine cache size.
//
size_t level;
char path_to_size_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/size";
char path_to_level_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/level";
int index = 40;
assert(path_to_size_file[index] == '-');
assert(path_to_level_file[index] == '-');

for (int i = 0; i < 5; i++)
{
path_to_size_file[index] = (char)(48 + i);
path_to_size_file[index] = (char)(48 + i);

uint64_t cache_size_from_sys_file = 0;
uint64_t cache_size_from_sys_file = 0;

if (ReadMemoryValueFromFile(path_to_size_file, &cache_size_from_sys_file))
{
cacheSize = std::max(cacheSize, (size_t)cache_size_from_sys_file);
if (ReadMemoryValueFromFile(path_to_size_file, &cache_size_from_sys_file))
{
*cacheSize = std::max(*cacheSize, (size_t)cache_size_from_sys_file);

path_to_level_file[index] = (char)(48 + i);
if (ReadMemoryValueFromFile(path_to_level_file, &level))
{
cacheLevel = level;
}
path_to_level_file[index] = (char)(48 + i);
if (ReadMemoryValueFromFile(path_to_level_file, &level))
{
*cacheLevel = level;
}
}
}
#endif
}

static void GetLogicalProcessorCacheSizeFromHeuristic(size_t* cacheLevel, size_t* cacheSize)
{
assert (cacheLevel != nullptr);
assert (cacheSize != nullptr);

#if (defined(TARGET_LINUX) && !defined(TARGET_APPLE))
{
// Use the following heuristics at best depending on the CPU count
// 1 ~ 4 : 4 MB
// 5 ~ 16 : 8 MB
// 17 ~ 64 : 16 MB
// 65+ : 32 MB
DWORD logicalCPUs = g_processAffinitySet.Count();
if (logicalCPUs < 5)
{
*cacheSize = 4;
}
else if (logicalCPUs < 17)
{
*cacheSize = 8;
}
else if (logicalCPUs < 65)
{
*cacheSize = 16;
}
else
{
*cacheSize = 32;
}

*cacheSize *= (1024 * 1024);
}
#endif
}

static size_t GetLogicalProcessorCacheSizeFromOS()
{
size_t cacheLevel = 0;
size_t cacheSize = 0;

if (GCConfig::GetGCCacheSizeFromSysConf())
{
GetLogicalProcessorCacheSizeFromSysConf(&cacheLevel, &cacheSize);
}

if (cacheSize == 0)
{
GetLogicalProcessorCacheSizeFromSysFs(&cacheLevel, &cacheSize);
if (cacheSize == 0)
{
GetLogicalProcessorCacheSizeFromHeuristic(&cacheLevel, &cacheSize);
}
}

#if HAVE_SYSCTLBYNAME
if (cacheSize == 0)
Expand All @@ -938,32 +998,7 @@ static size_t GetLogicalProcessorCacheSizeFromOS()
#if (defined(HOST_ARM64) || defined(HOST_LOONGARCH64)) && !defined(TARGET_APPLE)
if (cacheLevel != 3)
{
// We expect to get the L3 cache size for Arm64 but currently expected to be missing that info
// from most of the machines.
// Hence, just use the following heuristics at best depending on the CPU count
// 1 ~ 4 : 4 MB
// 5 ~ 16 : 8 MB
// 17 ~ 64 : 16 MB
// 65+ : 32 MB
DWORD logicalCPUs = g_processAffinitySet.Count();
if (logicalCPUs < 5)
{
cacheSize = 4;
}
else if (logicalCPUs < 17)
{
cacheSize = 8;
}
else if (logicalCPUs < 65)
{
cacheSize = 16;
}
else
{
cacheSize = 32;
}

cacheSize *= (1024 * 1024);
GetLogicalProcessorCacheSizeFromHeuristic(&cacheLevel, &cacheSize);
}
#endif

Expand Down

0 comments on commit 4f69316

Please sign in to comment.