Skip to content

Replace cvt instructions with bitwise operations in s8->bf16 conversions #7

Replace cvt instructions with bitwise operations in s8->bf16 conversions

Replace cvt instructions with bitwise operations in s8->bf16 conversions #7

name: Integration Tests
on:
workflow_dispatch:
pull_request:
# You can name your branch dev-foo to get CI runs.
branches: [main, 'dev-**']
merge_group:
branches: [main, 'dev-**']
types: [checks_requested]
push:
branches: [main]
concurrency:
group: ${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/master' }}
permissions: read-all
env:
TRITON_BUILD_WITH_CLANG_LLD: "TRUE"
TRITON_USE_ASSERT_ENABLED_LLVM: "TRUE"
TRITON_DISABLE_LINE_INFO: 1
jobs:
Runner-Preparation:
runs-on: ubuntu-latest
timeout-minutes: 30
outputs:
matrix-CUDA: ${{ steps.set-matrix.outputs.matrix-CUDA }}
matrix-HIP: ${{ steps.set-matrix.outputs.matrix-HIP }}
steps:
- name: Decide pre-submit integration test enablement
# Always enable integration tests for pre-submit pull requests.
if: github.event_name == 'pull_request'
run: |
echo "enable_integration=true" >> $GITHUB_ENV
- name: Checkout post-submit commits
if: github.event_name == 'push'
uses: actions/checkout@v4
with:
# Only fetch two commits to check the latest changed files.
fetch-depth: 2
- name: Detect post-submit changed files
id: detect-change
if: github.event_name == 'push'
uses: tj-actions/changed-files@v44
with:
# Monitor hash or version files in the cmake/ directory for changes.
files: |
cmake/*.txt
- name: Decide post-submit integration test enablement
# Only enable integration tests for post-submit when build dependency changes.
if: github.event_name == 'push' && steps.detect-change.outputs.any_changed == 'true'
run: |
echo "enable_integration=true" >> $GITHUB_ENV
- name: Prepare runner matrix
id: set-matrix
if: env.enable_integration == 'true'
run: |
if [ x"${{ github.repository }}" == x"openai/triton" ]; then
echo '::set-output name=matrix-CUDA::[["self-hosted", "A100"], ["self-hosted", "H100"]]'
echo '::set-output name=matrix-HIP::[["self-hosted", "gfx90a"]]'
else
echo '::set-output name=matrix-CUDA::["ubuntu-latest"]'
echo '::set-output name=matrix-HIP::["ubuntu-latest"]'
fi
pre-commit:
name: pre-commit (code formatting)
if: github.event_name == 'pull_request'
needs: Runner-Preparation
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.12'
cache: 'pip'
- name: Compute hash of pre-commit config
id: cache-key
run: |
echo "pre_commit_hash=$(sha256sum .pre-commit-config.yaml)" >> $GITHUB_OUTPUT
shell: bash
- name: Cache pre-commit's cache dir
uses: actions/cache@v4
with:
# Note that we cannot use environment variables here given there is
# no shell to interpret them in the paths.
path: |
~/.cache/pre-commit
key: ${{ runner.os }}-${{ steps.cache-key.outputs.pre_commit_hash }}
- name: Check pre-commit
run: |
python3 -m pip install --upgrade pre-commit
# TODO: ignore the first yapf failure until https://github.com/google/yapf/issues/1164 is fixed
python3 -m pre_commit run --all-files --verbose yapf &> /dev/null || true
# If first run of yapf worked and made changes reset the tree to the original state
git reset --hard
python3 -m pre_commit run --all-files --verbose
Integration-Tests:
needs: Runner-Preparation
runs-on: ${{ matrix.runner }}
timeout-minutes: 30
strategy:
matrix:
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-CUDA)}}
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: "true"
- name: Compute build dependency cache keys
id: cache-key
run: |
echo "llvm=$(cat cmake/llvm-hash.txt | cut -c 1-8)" >> $GITHUB_OUTPUT
echo "pybind11=$(cat cmake/pybind11-version.txt)" >> $GITHUB_OUTPUT
echo "nvidia=$(cat cmake/nvidia-toolchain-version.txt)" >> $GITHUB_OUTPUT
shell: bash
- name: Cache build dependency
uses: actions/cache@v4
with:
# Note that we cannot use environment variables here given there is
# no shell to interpret them in the paths.
path: |
~/.triton/llvm
~/.triton/nvidia
~/.triton/pybind11
key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-pybind11-${{ steps.cache-key.outputs.pybind11 }}
- name: Inspect cache directory
run: |
mkdir -p ~/.triton
ls -alh ~/.triton
- name: Update PATH
run: |
echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Install apt dependencies
run: |
sudo apt-get update -y
sudo apt-get install -y ccache clang lld
- name: Install pip dependencies
run: |
python3 -m pip install --upgrade pip
python3 -m pip install wheel cmake==3.24 ninja pytest-xdist lit
- name: Install Triton
run: |
echo "PATH is '$PATH'"
cd python
TRITON_BUILD_WITH_CCACHE=true python3 -m pip install --no-build-isolation -vvv '.[tests]'
- name: Run lit tests
run: |
cd python
LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test"
if [ ! -d "${LIT_TEST_DIR}" ]; then
echo "Coult not find '${LIT_TEST_DIR}'" ; exit -1
fi
lit -v "${LIT_TEST_DIR}"
- name: Run python tests on CUDA
run: |
cd python/test/unit
python3 -m pytest -vvv -n 8 --ignore=hopper/test_flashattention.py --ignore=runtime --ignore=language/test_line_info.py --ignore=language/test_subprocess.py
python3 -m pytest -vvv -n 8 language/test_subprocess.py
# Run runtime tests serially to avoid race condition with cache handling
python3 -m pytest -vvv runtime/
# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -vvv language/test_line_info.py
# Run hopper/test_flashattention.py separately to avoid out of gpu memory
python3 -m pytest -vs hopper/test_flashattention.py
- name: Run interpreter tests
if: ${{matrix.runner[0] == 'self-hosted' && matrix.runner[1] == 'H100'}}
env:
TRITON_INTERPRET: "1"
run: |
cd python/test/unit
python3 -m pytest -vvv -n 16 -m interpreter language/test_core.py language/test_standard.py \
language/test_random.py language/test_block_pointer.py operators/test_flash_attention.py::test_op \
--device cpu
- name: Run C++ unittests
run: |
cd python
cd "build/$(ls build | grep -i cmake)"
ctest
- name: Inspect cache directory
run: |
mkdir -p ~/.triton
ls -alh ~/.triton
du -sh ~/.triton/**
Integration-Tests-AMD:
needs: Runner-Preparation
runs-on: ${{ matrix.runner }}
timeout-minutes: 30
strategy:
matrix:
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-HIP)}}
container:
image: rocm/pytorch:rocm6.0.2_ubuntu22.04_py3.10_pytorch_2.1.2
options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: 'true'
- name: Compute build dependency cache keys
id: cache-key
run: |
echo "llvm=$(cat cmake/llvm-hash.txt | cut -c 1-8)" >> $GITHUB_OUTPUT
echo "pybind11=$(cat cmake/pybind11-version.txt)" >> $GITHUB_OUTPUT
echo "nvidia=$(cat cmake/nvidia-toolchain-version.txt)" >> $GITHUB_OUTPUT
shell: bash
- name: Clear triton cache directory
run: |
rm -rf ~/.triton
mkdir -p ~/.triton
ls -alh ~/.triton
- name: Cache build dependency
uses: actions/cache@v4
with:
# Note that we cannot use environment variables here given there is
# no shell to interpret them in the paths.
path: |
~/.triton/llvm
~/.triton/nvidia
~/.triton/pybind11
key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-pybind11-${{ steps.cache-key.outputs.pybind11 }}
- name: Inspect cache directory
run: |
mkdir -p ~/.triton
ls -alh ~/.triton
- name: Update PATH
run: |
echo "/opt/rocm/llvm/bin" >> $GITHUB_PATH
- name: Install pip dependencies
run: |
python3 -m pip install lit
- name: Install Triton
run: |
echo "PATH is '$PATH'"
pip uninstall -y triton
cd python
pip install -v -e .
- name: Run lit tests
run: |
cd python
LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test"
if [ ! -d "${LIT_TEST_DIR}" ]; then
echo "Coult not find '${LIT_TEST_DIR}'" ; exit -1
fi
lit -v "${LIT_TEST_DIR}"
- name: Run python tests on HIP
run: |
cd python/test/unit
pytest --capture=tee-sys -rfs -vvv -n 32 language operators \
--ignore=language/test_reproducer.py \
--ignore=language/test_conversions.py \
--ignore=language/test_line_info.py \
--ignore=operators/test_blocksparse.py
# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -vvv -n 8 language/test_line_info.py
# Run runtime tests serially to avoid race condition with cache handling
python3 -m pytest -vvv runtime
# Run hopper tests
python3 -m pytest -vs hopper/test_mixed_io.py \
hopper/test_tma_store_gemm.py
- name: Run C++ unittests
run: |
cd python
cd "build/$(ls build | grep -i cmake)"
ctest
- name: Inspect cache directory
run: |
mkdir -p ~/.triton
ls -alh ~/.triton
du -sh ~/.triton/**