Skip to content

Commit

Permalink
chore(ci): add workflow for erc20 benchmarks on gpu
Browse files Browse the repository at this point in the history
  • Loading branch information
soonum committed Oct 10, 2024
1 parent 0ec1a0d commit 8db1848
Show file tree
Hide file tree
Showing 3 changed files with 219 additions and 3 deletions.
195 changes: 195 additions & 0 deletions .github/workflows/benchmark_gpu_erc20.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
# Run ERC20 benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
name: ERC20 GPU H100 benchmarks

on:
workflow_dispatch:
schedule:
# Weekly benchmarks will be triggered each Saturday at 5a.m.
- cron: '0 5 * * 6'

env:
CARGO_TERM_COLOR: always
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
RUST_BACKTRACE: "full"
RUST_MIN_STACK: "8388608"
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

jobs:
setup-instance:
name: Setup instance (cuda-erc20-benchmarks)
runs-on: ubuntu-latest
if: github.event_name == 'workflow_dispatch' ||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@c0e7168795bd78f61f61146951ed9d0c73c9b701
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: hyperstack
profile: single-h100

cuda-erc20-benchmarks:
name: Execute GPU integer benchmarks
needs: setup-instance
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
fail-fast: false
# explicit include-based build matrix, of known valid options
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs repo with tags
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}

- name: Get benchmark details
run: |
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
- name: Set up home
# "Install rust" step require root user to have a HOME directory which is not set.
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
with:
toolchain: nightly

- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
{
echo "CUDA_PATH=$CUDA_PATH";
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
} >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi

- name: Run benchmarks
run: |
make bench_hlapi_erc20_gpu
- name: Parse results
run: |
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
--database tfhe_rs \
--hardware "n3-H100x1" \
--backend gpu \
--project-version "${{ env.COMMIT_HASH }}" \
--branch ${{ github.ref_name }} \
--commit-date "${{ env.COMMIT_DATE }}" \
--bench-date "${{ env.BENCH_DATE }}" \
--walk-subdirs \
--name-suffix avx512
- name: Parse PBS counts
run: |
python3 ./ci/benchmark_parser.py tfhe/erc20_pbs_count.csv ${{ env.RESULTS_FILENAME }} \
--key-sizes \
--append-results
- name: Upload parsed results artifact
uses: actions/upload-artifact@50769540e7f4bd5e21e526ee35c689e35e0d6874
with:
name: ${{ github.sha }}_erc20
path: ${{ env.RESULTS_FILENAME }}

- name: Checkout Slab repo
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938
with:
repository: zama-ai/slab
path: slab
token: ${{ secrets.FHE_ACTIONS_TOKEN }}

- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
slack-notify:
name: Slack Notification
needs: [ setup-instance, cuda-erc20-benchmarks ]
runs-on: ubuntu-latest
if: ${{ always() && needs.cuda-erc20-benchmarks.result != 'skipped' && failure() }}
continue-on-error: true
steps:
- name: Send message
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
env:
SLACK_COLOR: ${{ needs.cuda-erc20-benchmarks.result }}
SLACK_MESSAGE: "Integer GPU benchmarks finished with status: ${{ needs.cuda-erc20-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"

teardown-instance:
name: Teardown instance (cuda-erc20-benchmarks)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
needs: [ setup-instance, cuda-erc20-benchmarks, slack-notify ]
runs-on: ubuntu-latest
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@c0e7168795bd78f61f61146951ed9d0c73c9b701
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}

- name: Slack Notification
if: ${{ failure() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-erc20-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -1155,6 +1155,12 @@ bench_hlapi_erc20: install_rs_check_toolchain
--bench hlapi-erc20 \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --

.PHONY: bench_hlapi_erc20_gpu # Run benchmarks for ECR20 operations on GPU
bench_hlapi_erc20_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-erc20 \
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --

#
# Utility tools
#
Expand Down
21 changes: 18 additions & 3 deletions tfhe/benches/high_level_api/erc20.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,12 @@ mod pbs_stats {
println!("ERC20 transfer/{fn_name}::{type_name}: {count} PBS");

let params = client_key.computation_parameters();
let test_name = format!("hlapi::erc20::pbs_count::{fn_name}::{type_name}");

let test_name = if cfg!(feature = "gpu") {
format!("hlapi::cuda::erc20::pbs_count::{fn_name}::{type_name}")
} else {
format!("hlapi::erc20::pbs_count::{fn_name}::{type_name}")
};

let results_file = Path::new("erc20_pbs_count.csv");
if !results_file.exists() {
Expand Down Expand Up @@ -301,7 +306,12 @@ fn main() {

// FheUint64 latency
{
let bench_name = "hlapi::erc20::transfer_latency";
let bench_name = if cfg!(feature = "gpu") {
"hlapi::cuda::erc20::transfer_latency"
} else {
"hlapi::erc20::transfer_latency"
};

let mut group = c.benchmark_group(bench_name);
bench_transfer_latency(
&mut group,
Expand Down Expand Up @@ -341,7 +351,12 @@ fn main() {

// FheUint64 Throughput
{
let bench_name = "hlapi::erc20::transfer_throughput";
let bench_name = if cfg!(feature = "gpu") {
"hlapi::cuda::erc20::transfer_throughput"
} else {
"hlapi::erc20::transfer_throughput"
};

let mut group = c.benchmark_group(bench_name);
bench_transfer_throughput(
&mut group,
Expand Down

0 comments on commit 8db1848

Please sign in to comment.