Skip to content

Commit

Permalink
chore(ci): refacto gpu bench workflows to reduce duplicates
Browse files Browse the repository at this point in the history
Now there is only one entry point to trigger benchmarks manually.
This entry point uses a sub-workflow responsible for provisioning
and running the benchmarks.
A weekly workflow is also created with all the targets needed.

This also adds the possibility to run throughput benchmarks
on-demand.
  • Loading branch information
soonum committed Nov 21, 2024
1 parent 5c226e9 commit 9da58f6
Show file tree
Hide file tree
Showing 8 changed files with 281 additions and 1,314 deletions.
271 changes: 72 additions & 199 deletions .github/workflows/benchmark_gpu_integer.yml
Original file line number Diff line number Diff line change
@@ -1,213 +1,86 @@
# Run integer benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
name: Integer GPU benchmarks
# Run CUDA benchmarks on a Hyperstack VM and return parsed results to Slab CI bot.
name: Cuda benchmarks

on:
workflow_dispatch:
inputs:
run_throughput:
description: "Run throughput benchmarks"
profile:
description: "Instance type"
required: true
type: choice
options:
- "l40 (n3-L40x1)"
- "single-h100 (n3-H100x1)"
- "2-h100 (n3-H100x2)"
- "4-h100 (n3-H100x4)"
- "multi-h100 (n3-H100x8)"
- "multi-h100-nvlink (n3-H100x8-NVLink)"
- "multi-a100-nvlink (n3-A100x8-NVLink)"
command:
description: "Benchmark command to run"
type: choice
default: integer_multi_bit
options:
- integer
- integer_multi_bit
- integer_compression
- pbs
- ks
op_flavor:
description: "Operations set to run"
type: choice
default: default
options:
- default
- fast_default
- unchecked
all_precisions:
description: "Run all precisions"
type: boolean
default: false

push:
branches:
- main

env:
CARGO_TERM_COLOR: always
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
RUST_BACKTRACE: "full"
RUST_MIN_STACK: "8388608"
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
BENCH_TYPE: latency
bench_type:
description: "Benchmarks type"
type: choice
default: latency
options:
- latency
- throughput
- both

jobs:
setup-instance:
name: Setup instance (cuda-integer-benchmarks)
parse-inputs:
runs-on: ubuntu-latest
if: github.event_name == 'workflow_dispatch' ||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs')
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
profile: ${{ steps.parse_profile.outputs.profile }}
hardware_name: ${{ steps.parse_hardware_name.outputs.name }}
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: hyperstack
profile: single-h100

cuda-integer-benchmarks:
name: Execute GPU integer benchmarks
needs: setup-instance
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
fail-fast: false
# explicit include-based build matrix, of known valid options
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs repo with tags
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}

- name: Get benchmark details
run: |
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
- name: Set up home
# "Install rust" step require root user to have a HOME directory which is not set.
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
with:
toolchain: nightly

- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
{
echo "CUDA_PATH=$CUDA_PATH";
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
} >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi

- name: Should run throughput benchmarks
if: inputs.run_throughput
- name: Parse profile
id: parse_profile
run: |
echo "BENCH_TYPE=throughput" >> "${GITHUB_ENV}"
echo "profile=$(echo \"${{ inputs.profile }}\" | sed 's|\(.*\)[[:space:]](.*)|\1|')" >> "${GITHUB_OUTPUT}"
- name: Run benchmarks with AVX512
- name: Parse hardware name
id: parse_hardware_name
run: |
make FAST_BENCH=TRUE BENCH_OP_FLAVOR=default bench_integer_gpu
- name: Parse benchmarks to csv
run: |
make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \
parse_integer_benches
- name: Upload csv results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
with:
name: ${{ github.sha }}_csv_integer
path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}

- name: Parse results
run: |
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
--database tfhe_rs \
--hardware "n3-H100x1" \
--backend gpu \
--project-version "${{ env.COMMIT_HASH }}" \
--branch ${{ github.ref_name }} \
--commit-date "${{ env.COMMIT_DATE }}" \
--bench-date "${{ env.BENCH_DATE }}" \
--walk-subdirs \
--name-suffix avx512 \
--bench-type ${{ env.BENCH_TYPE }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
with:
name: ${{ github.sha }}_integer
path: ${{ env.RESULTS_FILENAME }}

- name: Checkout Slab repo
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
repository: zama-ai/slab
path: slab
token: ${{ secrets.FHE_ACTIONS_TOKEN }}

- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
slack-notify:
name: Slack Notification
needs: [ setup-instance, cuda-integer-benchmarks ]
runs-on: ubuntu-latest
if: ${{ always() && needs.cuda-integer-benchmarks.result != 'skipped' && failure() }}
continue-on-error: true
steps:
- name: Send message
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ needs.cuda-integer-benchmarks.result }}
SLACK_MESSAGE: "Integer GPU benchmarks finished with status: ${{ needs.cuda-integer-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"

teardown-instance:
name: Teardown instance (cuda-integer-benchmarks)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
needs: [ setup-instance, cuda-integer-benchmarks, slack-notify ]
runs-on: ubuntu-latest
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}

- name: Slack Notification
if: ${{ failure() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-integer-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
echo "name=$(echo \"${{ inputs.profile }}\" | sed 's|.*[[:space:]](\(.*\))|\1|')" >> "${GITHUB_OUTPUT}"
run-benchmarks:
name: Run benchmarks
needs: parse-inputs
uses: ./.github/workflows/benchmark_gpu_integer_common.yml
with:
profile: ${{ needs.parse-inputs.outputs.profile }}
hardware_name: ${{ needs.parse-inputs.outputs.hardware_name }}
command: ${{ inputs.command }}
op_flavor: ${{ inputs.op_flavor }}
bench_type: ${{ inputs.bench_type }}
all_precisions: ${{ inputs.all_precisions }}
secrets:
FHE_ACTIONS_TOKEN: ${{ secrets.FHE_ACTIONS_TOKEN }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTIONS_TOKEN }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
SLAB_URL: ${{ secrets.SLAB_URL }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
Loading

0 comments on commit 9da58f6

Please sign in to comment.