Skip to content

Commit

Permalink
add development for sapphire rapids
Browse files Browse the repository at this point in the history
  • Loading branch information
victoryang00 committed Nov 23, 2023
1 parent b5181aa commit d34018f
Show file tree
Hide file tree
Showing 62 changed files with 3,453 additions and 107,512 deletions.
19 changes: 9 additions & 10 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@ name: CMake

on:
push:
branches: [ "main" ]
branches: [ "main", "asplos-dev" ]
pull_request:
branches: [ "main" ]
branches: [ "main", "asplos-dev" ]

env:
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
BUILD_TYPE: Release


jobs:
build:
# The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac.
Expand All @@ -21,20 +22,18 @@ jobs:
- uses: actions/checkout@v3

- name: Install dependencies
run: sudo apt install llvm-dev clang libbpf-dev libclang-dev libcxxopts-dev libfmt-dev librange-v3-dev

- name: Install Custom dependencies
run: wget http://launchpadlibrarian.net/605552811/libbpf0_0.8.0-1_amd64.deb && wget http://launchpadlibrarian.net/605552807/libbpf-dev_0.8.0-1_amd64.deb && sudo dpkg -i ./libbpf0_0.8.0-1_amd64.deb && sudo dpkg -i ./libbpf-dev_0.8.0-1_amd64.deb
run: sudo apt install llvm-dev clang libbpf-dev libclang-dev python3-pip gcc-13 g++-13 && git submodule update --init --recursive

- name: Sed Current uncompiled include file
run: sudo sed -i 's/NL_SET_ERR_MSG_MOD/\/\/NL_SET_ERR_MSG_MOD/g' /usr/src/linux-headers-`uname -r`/include/net/flow_offload.h
- name: Install conan
working-directory: ${{github.workspace}}
run: pip3 install conan && conan profile detect && mkdir build && cd build && cp ../conanfile.txt . && CC=gcc-13 CXX=g++-13 conan install . -s compiler.cppstd=gnu23

- name: Configure CMake
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
# See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
run: CC=gcc-13 CXX=g++-13 cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_TOOLCHAIN_FILE=${{github.workspace}}/build/build/${{env.BUILD_TYPE}}/generators/conan_toolchain.cmake -DCMAKE_POLICY_DEFAULT_CMP0091=NEW

- name: Build
# Build your program with the given configuration
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
run: CC=gcc-13 CXX=g++-13 cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}

9 changes: 8 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,11 @@ benchmarks*
Makefile
*.o
py_smdk_pkg
lib
lib
CMakePresets.json
.cmake
CMakeUserPresets.json
*~
voltdb
foo
CMakeFiles
15 changes: 15 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[submodule "workloads/memcached-ycsb"]
path = workloads/memcached-ycsb
url = https://github.com/SlugLab/YCSB/
[submodule "workloads/memcached"]
path = workloads/memcached
url = https://github.com/memcached/memcached
[submodule "workloads/gapbs"]
path = workloads/gapbs
url = https://github.com/victoryang00/gapbs
[submodule "script/perfmon"]
path = script/perfmon
url = https://github.com/intel/perfmon
[submodule "workloads/vectordb"]
path = workloads/vectordb
url = https://github.com/jina-ai/vectordb
39 changes: 20 additions & 19 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,29 +1,30 @@
cmake_minimum_required(VERSION 3.11.0)
project(CXL-MEM-Simulator VERSION 0.1.0)
project(CXLMemSim VERSION 0.1.0)
set(CMAKE_CXX_STANDARD 23)

if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
add_compile_options (-fdiagnostics-color=always)
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
add_compile_options (-fcolor-diagnostics)
endif ()

add_subdirectory(microbench)
add_subdirectory(workloads)

list(APPEND CMAKE_PREFIX_PATH ${CMAKE_BINARY_DIR})

find_package(cxxopts REQUIRED)
find_package(fmt REQUIRED)
find_package(range-v3 REQUIRED)
file(GLOB_RECURSE SOURCE_FILES src/*.cpp)

execute_process(COMMAND uname -r OUTPUT_VARIABLE arch OUTPUT_STRIP_TRAILING_WHITESPACE)
set(LINUX_SOURCE /lib/modules/${arch}/build/)
set(CMAKE_CXX_FLAGS "-Wall -g -pthread -lrt -rdynamic")
set(CMAKE_CXX_STANDARD 23)

add_executable(CXL-MEM-Simulator ${SOURCE_FILES})

include_directories(CXL-MEM-Simulator include)
target_link_libraries(CXL-MEM-Simulator cxxopts::cxxopts fmt::fmt range-v3::range-v3 elf bpf)
set(CMAKE_CXX_FLAGS "-Wall -fPIC -pthread -ldl -lrt -mavx512f -mpreferred-stack-boundary=4 -g")

function(bpf prefix)
add_custom_target(${prefix}_bpf ALL
COMMAND clang --target=bpf -nostdinc -S -I/usr/include/linux -I${CMAKE_SOURCE_DIR}/include -I${LINUX_SOURCE}/arch/x86/include -I/usr/include -I${LINUX_SOURCE}/arch/x86/include/uapi -I${LINUX_SOURCE}/arch/x86/include/generated -I${LINUX_SOURCE}/arch/x86/include/generated/uapi -I${LINUX_SOURCE}/include -I${LINUX_SOURCE}/include/uapi -I${LINUX_SOURCE}/include/generated/uapi -I${LINUX_SOURCE}/tools/testing/selftests/bpf -include ${LINUX_SOURCE}/include/linux/kconfig.h -D__KERNEL__ -D__ASM_SYSREG_H -D__BPF_TRACING__ -D__TARGET_ARCH_x86 -Wno-implicit-function-declaration -O3 -emit-llvm -g -c ${CMAKE_SOURCE_DIR}/src/${prefix}.c -o ${CMAKE_BINARY_DIR}/${prefix}.ll
COMMAND llc -march=bpf -filetype=obj -o ${CMAKE_BINARY_DIR}/${prefix}.o ${CMAKE_BINARY_DIR}/${prefix}.ll
)
add_dependencies(CXL-MEM-Simulator ${prefix}_bpf)
endfunction()
add_executable(CXLMemSim ${SOURCE_FILES} src/main.cc)

bpf(collectmmap)
include_directories(CXLMemSim include ${cxxopts_INCLUDE_DIR} ${fmt_INCLUDE_DIR})
target_link_libraries(CXLMemSim fmt::fmt cxxopts::cxxopts)

add_subdirectory(microbench)
add_library(CXLMemSimHook SHARED src/module.cc)
add_executable(CXLMemSimSock ${SOURCE_FILES} src/sock.cc)
target_link_libraries(CXLMemSimSock fmt::fmt cxxopts::cxxopts)
23 changes: 5 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
# CXL.mem Simulator
The epoch design of this project is mostly refering to [mes](https://github.com/takahiro-hirofuchi/mesmeric-emulator), the novelty is use pebs to construct the topology and calculate the hierachy latency based on this. See the [talk](https://docs.google.com/file/d/1bZi2rbB-u5xMw_YET726gb2s9QuxMZJE/edit?usp=docslist_api&filetype=mspresentation)
The CXL.mem simulator is to use the target latency for simulating the CPU perspective taking ROB and different cacheline state's into panelty from the application level.

## Prerequisite
```bash
$ uname -a
Linux gpu01 5.19.0-29-generic #30-Ubuntu SMP PREEMPT_DYNAMIC Wed Jan 4 12:14:09 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux
$ sudo apt install llvm-dev clang libbpf-dev libclang-dev libcxxopts-dev libfmt-dev librange-v3-dev
Linux banana 6.4.0+ #86 SMP PREEMPT_DYNAMIC Fri Jul 28 23:49:33 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux
$ echo 0 | sudo tee /sys/devices/system/node/node1/cpu*/online >/dev/null 2>&1
```
## User input
```bash
LOGV=1 ./CXL-MEM-Simulator -t ./microbench/many_calloc -i 5 -c 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
LOGV=1 ./CXL-MEM-Simulator -t ./microbench/ld -i 5 -c 0,2 -d 85 -b 10,10 -l 100,100 -c 100,100 -w 85.5,86.5,87.5,85.5,86.5,87.5,88. -o "(1,(2,3))"
```
1. -t Target: The path to the executable
2. -i Interval: The epoch of the simulator, the parameter is in milisecond
3. -c CPUSet: The core id to run the executable and the rest will be `setaffinity` to one other core
4. -d Dram Latency: The current platform's DRAM latency, default is 85ns
4. -d Dram Latency: The current platform's DRAM latency, default is 85ns # mark that bw in the remote
5. -b, -l Bandwidth, Latency: Both use 2 input in the vector, first for read, second for write
6. -c Capacity: The capacity of the memory with first be local, remaining accordingly to the input vector.
7. -w Weight: Use the heuristic to calculate the bandwidth
Expand All @@ -30,16 +30,3 @@ LOGV=1 ./CXL-MEM-Simulator -t ./microbench/many_calloc -i 5 -c 0,1,2,3,4,5,6,7,8
3
```
9. env LOGV stands for logs level that you can see.
## Limitation
The pebs requires no larger than 5 `perf_open_event` attached to certain PID, so I limit the bpf program to munmap(kprobe) and sbrk(kprobe/kretprobe), you can configure them. For multiple process application, I need to first SIGSTOP the process and `send/recv` back the PID information. For client and server application, I need to SIGSTOP/SIGCONT on both client and server simultaneously, which is not implemented yet.

## Cite
```bash
@article{yangyarch23,
title={CXLMemSim: A pure software simulated CXL.mem for performance characterization},
author={Yiwei Yang, Pooneh Safayenikoo, Jiacheng Ma, Tanvir Ahmed Khan, Andrew Quinn},
journal={arXiv preprint arXiv:2303.06153},
booktitle={The fifth Young Architect Workshop (YArch'23)},
year={2023}
}
```
19 changes: 0 additions & 19 deletions artifact/build_and_run_all.sh

This file was deleted.

10 changes: 0 additions & 10 deletions artifact/compare_with_gem5.sh

This file was deleted.

14 changes: 14 additions & 0 deletions artifact/gen_workloads.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
# anns
# monetdb
# pointer_chasing lmbench3
# wrf stream
# mlc
# gromacs
# smdk's

git clone https://github.com/scott-beamer/gapbs.git
cd gapbs
make benchmark

wget https://files.rcsb.org/download/4i4f.pdb
File renamed without changes.
55 changes: 55 additions & 0 deletions artifact/mlc-sapphirerapids.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
Intel(R) Memory Latency Checker - v3.10
*** Unable to modify prefetchers (try executing 'modprobe msr')
*** So, enabling random access for latency measurements
Measuring idle latencies for random access (in ns)...
Numa node Numa node
Numa node 0 1
0 106.3 437.5

Measuring Peak Injection Memory Bandwidths for the system
Bandwidths are in MB/sec (1 MB/sec = 1,000,000 Bytes/sec)
Using all the threads from each core if Hyper-threading is enabled
Using traffic with the following read-write ratios
ALL Reads : 28611.9
3:1 Reads-Writes : 25057.1
2:1 Reads-Writes : 25078.0
1:1 Reads-Writes : 23965.9
Stream-triad like: 24943.3

Measuring Memory Bandwidths between nodes within system
Bandwidths are in MB/sec (1 MB/sec = 1,000,000 Bytes/sec)
Using all the threads from each core if Hyper-threading is enabled
Using Read-only traffic type
Numa node Numa node
Numa node 0 1
0 28612.0 19216.8

Measuring Loaded Latencies for the system
Using all the threads from each core if Hyper-threading is enabled
Using Read-only traffic type
Inject Latency Bandwidth
Delay (ns) MB/sec
==========================
00000 370.12 28393.2
00002 369.29 28435.4
00008 378.41 28490.5
00015 354.32 28414.2
00050 313.07 28323.0
00100 238.51 28010.5
00200 125.13 14566.0
00300 119.69 10232.0
00400 116.76 7905.3
00500 115.33 6500.4
00700 113.89 4858.3
01000 113.03 3594.6
01300 112.57 2906.9
01700 112.09 2363.9
02500 111.51 1798.9
03500 111.21 1520.8
05000 110.77 1193.2
09000 110.38 922.3
20000 110.14 735.6

Measuring cache-to-cache transfer latency (in ns)...
Local Socket L2->L2 HIT latency 67.3
Local Socket L2->L2 HITM latency 67.5
9 changes: 9 additions & 0 deletions conanfile.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
[requires]
cxxopts/3.0.0
fmt/9.0.0
nlohmann_json/3.11.2
[generators]
CMakeDeps
CMakeToolchain
[layout]
cmake_layout
12 changes: 0 additions & 12 deletions include/alloc.h

This file was deleted.

43 changes: 32 additions & 11 deletions include/cxlcontroller.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,43 +2,64 @@
// Created by victoryang00 on 1/14/23.
//

#ifndef CXL_MEM_SIMULATOR_CXLCONTROLLER_H
#define CXL_MEM_SIMULATOR_CXLCONTROLLER_H
#ifndef CXLMEMSIM_CXLCONTROLLER_H
#define CXLMEMSIM_CXLCONTROLLER_H

#include "cxlcounter.h"
#include "cxlendpoint.h"
#include <cstdint>
#include <string_view>
#include <unordered_map>
#include <vector>

enum page_type { CACHELINE, PAGE, HUGEPAGE_2M, HUGEPAGE_1G };

class CXLController;
class Policy {
class AllocationPolicy {
public:
Policy();
AllocationPolicy();
virtual int compute_once(CXLController *) = 0;
// No write problem
};
class MigrationPolicy {
public:
MigrationPolicy();
virtual int compute_once(CXLController *) = 0; // reader writer
// paging related
// switching related
};

// need to give a timeout and will be added latency later,
class PagingPolicy {
public:
PagingPolicy();
virtual int compute_once(CXLController *) = 0; // reader writer
// paging related
};

class CXLController : CXLSwitch {
public:
std::vector<CXLMemExpander *> cur_expanders{};
int capacity; // GB
Policy *policy;
AllocationPolicy *policy;
CXLCounter counter;
std::map<uint64_t, uint64_t> occupation;
std::map<uint64_t, uint64_t> va_pa_map;
bool is_page;
enum page_type page_type_; // percentage
int num_switches = 0;
CXLController(Policy *policy, int capacity, bool is_page, int epoch);

CXLController(AllocationPolicy *p, int capacity, enum page_type page_type_, int epoch);
void construct_topo(std::string_view newick_tree);
void insert_end_point(CXLMemExpander *end_point);
std::vector<std::string> tokenize(const std::string_view &s);
std::tuple<double,std::vector<uint64_t>> calculate_congestion() override;
std::tuple<double, std::vector<uint64_t>> calculate_congestion() override;
void set_epoch(int epoch) override;
std::tuple<int, int> get_all_access() override;
double calculate_latency(LatencyPass elem); // traverse the tree to calculate the latency
double calculate_bandwidth(BandwidthPass elem);
double calculate_latency(LatencyPass elem) override; // traverse the tree to calculate the latency
double calculate_bandwidth(BandwidthPass elem) override;
int insert(uint64_t timestamp, uint64_t phys_addr, uint64_t virt_addr, int index) override;
void delete_entry(uint64_t addr, uint64_t length) override;
std::string output() override;
};

#endif // CXL_MEM_SIMULATOR_CXLCONTROLLER_H
#endif // CXLMEMSIM_CXLCONTROLLER_H
Loading

0 comments on commit d34018f

Please sign in to comment.