Replace cvt instructions with bitwise operations in s8->bf16 conversions #7

Workflow file for this run

.github/workflows/integration-tests.yml at f09c4d6

	name: Integration Tests

	on:
	workflow_dispatch:
	pull_request:
	# You can name your branch dev-foo to get CI runs.
	branches: [main, 'dev-**']
	merge_group:
	branches: [main, 'dev-**']
	types: [checks_requested]
	push:
	branches: [main]

	concurrency:
	group: ${{ github.ref }}
	cancel-in-progress: ${{ github.ref != 'refs/heads/master' }}

	permissions: read-all

	env:
	TRITON_BUILD_WITH_CLANG_LLD: "TRUE"
	TRITON_USE_ASSERT_ENABLED_LLVM: "TRUE"
	TRITON_DISABLE_LINE_INFO: 1

	jobs:
	Runner-Preparation:
	runs-on: ubuntu-latest
	timeout-minutes: 30
	outputs:
	matrix-CUDA: ${{ steps.set-matrix.outputs.matrix-CUDA }}
	matrix-HIP: ${{ steps.set-matrix.outputs.matrix-HIP }}
	steps:
	- name: Decide pre-submit integration test enablement
	# Always enable integration tests for pre-submit pull requests.
	if: github.event_name == 'pull_request'
	run: \|
	echo "enable_integration=true" >> $GITHUB_ENV

	- name: Checkout post-submit commits
	if: github.event_name == 'push'
	uses: actions/checkout@v4
	with:
	# Only fetch two commits to check the latest changed files.
	fetch-depth: 2

	- name: Detect post-submit changed files
	id: detect-change
	if: github.event_name == 'push'
	uses: tj-actions/changed-files@v44
	with:
	# Monitor hash or version files in the cmake/ directory for changes.
	files: \|
	cmake/*.txt

	- name: Decide post-submit integration test enablement
	# Only enable integration tests for post-submit when build dependency changes.
	if: github.event_name == 'push' && steps.detect-change.outputs.any_changed == 'true'
	run: \|
	echo "enable_integration=true" >> $GITHUB_ENV

	- name: Prepare runner matrix
	id: set-matrix
	if: env.enable_integration == 'true'
	run: \|
	if [ x"${{ github.repository }}" == x"openai/triton" ]; then
	echo '::set-output name=matrix-CUDA::[["self-hosted", "A100"], ["self-hosted", "H100"]]'
	echo '::set-output name=matrix-HIP::[["self-hosted", "gfx90a"]]'
	else
	echo '::set-output name=matrix-CUDA::["ubuntu-latest"]'
	echo '::set-output name=matrix-HIP::["ubuntu-latest"]'
	fi

	pre-commit:
	name: pre-commit (code formatting)
	if: github.event_name == 'pull_request'
	needs: Runner-Preparation
	runs-on: ubuntu-latest

	steps:
	- name: Checkout
	uses: actions/checkout@v4

	- uses: actions/setup-python@v5
	with:
	python-version: '3.12'
	cache: 'pip'

	- name: Compute hash of pre-commit config
	id: cache-key
	run: \|
	echo "pre_commit_hash=$(sha256sum .pre-commit-config.yaml)" >> $GITHUB_OUTPUT
	shell: bash

	- name: Cache pre-commit's cache dir
	uses: actions/cache@v4
	with:
	# Note that we cannot use environment variables here given there is
	# no shell to interpret them in the paths.
	path: \|
	~/.cache/pre-commit
	key: ${{ runner.os }}-${{ steps.cache-key.outputs.pre_commit_hash }}

	- name: Check pre-commit
	run: \|
	python3 -m pip install --upgrade pre-commit
	# TODO: ignore the first yapf failure until https://github.com/google/yapf/issues/1164 is fixed
	python3 -m pre_commit run --all-files --verbose yapf &> /dev/null \|\| true
	# If first run of yapf worked and made changes reset the tree to the original state
	git reset --hard
	python3 -m pre_commit run --all-files --verbose

	Integration-Tests:
	needs: Runner-Preparation

	runs-on: ${{ matrix.runner }}
	timeout-minutes: 30

	strategy:
	matrix:
	runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-CUDA)}}

	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	submodules: "true"

	- name: Compute build dependency cache keys
	id: cache-key
	run: \|
	echo "llvm=$(cat cmake/llvm-hash.txt \| cut -c 1-8)" >> $GITHUB_OUTPUT
	echo "pybind11=$(cat cmake/pybind11-version.txt)" >> $GITHUB_OUTPUT
	echo "nvidia=$(cat cmake/nvidia-toolchain-version.txt)" >> $GITHUB_OUTPUT
	shell: bash

	- name: Cache build dependency
	uses: actions/cache@v4
	with:
	# Note that we cannot use environment variables here given there is
	# no shell to interpret them in the paths.
	path: \|
	~/.triton/llvm
	~/.triton/nvidia
	~/.triton/pybind11
	key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-pybind11-${{ steps.cache-key.outputs.pybind11 }}

	- name: Inspect cache directory
	run: \|
	mkdir -p ~/.triton
	ls -alh ~/.triton

	- name: Update PATH
	run: \|
	echo "$HOME/.local/bin" >> $GITHUB_PATH

	- name: Install apt dependencies
	run: \|
	sudo apt-get update -y
	sudo apt-get install -y ccache clang lld

	- name: Install pip dependencies
	run: \|
	python3 -m pip install --upgrade pip
	python3 -m pip install wheel cmake==3.24 ninja pytest-xdist lit

	- name: Install Triton
	run: \|
	echo "PATH is '$PATH'"
	cd python
	TRITON_BUILD_WITH_CCACHE=true python3 -m pip install --no-build-isolation -vvv '.[tests]'

	- name: Run lit tests
	run: \|
	cd python
	LIT_TEST_DIR="build/$(ls build \| grep -i cmake)/test"
	if [ ! -d "${LIT_TEST_DIR}" ]; then
	echo "Coult not find '${LIT_TEST_DIR}'" ; exit -1
	fi
	lit -v "${LIT_TEST_DIR}"

	- name: Run python tests on CUDA
	run: \|
	cd python/test/unit
	python3 -m pytest -vvv -n 8 --ignore=hopper/test_flashattention.py --ignore=runtime --ignore=language/test_line_info.py --ignore=language/test_subprocess.py
	python3 -m pytest -vvv -n 8 language/test_subprocess.py
	# Run runtime tests serially to avoid race condition with cache handling
	python3 -m pytest -vvv runtime/
	# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
	TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -vvv language/test_line_info.py
	# Run hopper/test_flashattention.py separately to avoid out of gpu memory
	python3 -m pytest -vs hopper/test_flashattention.py

	- name: Run interpreter tests
	if: ${{matrix.runner[0] == 'self-hosted' && matrix.runner[1] == 'H100'}}
	env:
	TRITON_INTERPRET: "1"
	run: \|
	cd python/test/unit
	python3 -m pytest -vvv -n 16 -m interpreter language/test_core.py language/test_standard.py \
	language/test_random.py language/test_block_pointer.py operators/test_flash_attention.py::test_op \
	--device cpu

	- name: Run C++ unittests
	run: \|
	cd python
	cd "build/$(ls build \| grep -i cmake)"
	ctest

	- name: Inspect cache directory
	run: \|
	mkdir -p ~/.triton
	ls -alh ~/.triton
	du -sh ~/.triton/**


	Integration-Tests-AMD:
	needs: Runner-Preparation

	runs-on: ${{ matrix.runner }}
	timeout-minutes: 30

	strategy:
	matrix:
	runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-HIP)}}

	container:
	image: rocm/pytorch:rocm6.0.2_ubuntu22.04_py3.10_pytorch_2.1.2
	options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root

	steps:
	- name: Checkout
	uses: actions/checkout@v4
	with:
	submodules: 'true'

	- name: Compute build dependency cache keys
	id: cache-key
	run: \|
	echo "llvm=$(cat cmake/llvm-hash.txt \| cut -c 1-8)" >> $GITHUB_OUTPUT
	echo "pybind11=$(cat cmake/pybind11-version.txt)" >> $GITHUB_OUTPUT
	echo "nvidia=$(cat cmake/nvidia-toolchain-version.txt)" >> $GITHUB_OUTPUT
	shell: bash

	- name: Clear triton cache directory
	run: \|
	rm -rf ~/.triton
	mkdir -p ~/.triton
	ls -alh ~/.triton

	- name: Cache build dependency
	uses: actions/cache@v4
	with:
	# Note that we cannot use environment variables here given there is
	# no shell to interpret them in the paths.
	path: \|
	~/.triton/llvm
	~/.triton/nvidia
	~/.triton/pybind11
	key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-pybind11-${{ steps.cache-key.outputs.pybind11 }}

	- name: Inspect cache directory
	run: \|
	mkdir -p ~/.triton
	ls -alh ~/.triton

	- name: Update PATH
	run: \|
	echo "/opt/rocm/llvm/bin" >> $GITHUB_PATH

	- name: Install pip dependencies
	run: \|
	python3 -m pip install lit

	- name: Install Triton
	run: \|
	echo "PATH is '$PATH'"
	pip uninstall -y triton
	cd python
	pip install -v -e .

	- name: Run lit tests
	run: \|
	cd python
	LIT_TEST_DIR="build/$(ls build \| grep -i cmake)/test"
	if [ ! -d "${LIT_TEST_DIR}" ]; then
	echo "Coult not find '${LIT_TEST_DIR}'" ; exit -1
	fi
	lit -v "${LIT_TEST_DIR}"

	- name: Run python tests on HIP
	run: \|
	cd python/test/unit
	pytest --capture=tee-sys -rfs -vvv -n 32 language operators \
	--ignore=language/test_reproducer.py \
	--ignore=language/test_conversions.py \
	--ignore=language/test_line_info.py \
	--ignore=operators/test_blocksparse.py
	# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0
	TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -vvv -n 8 language/test_line_info.py

	# Run runtime tests serially to avoid race condition with cache handling
	python3 -m pytest -vvv runtime
	# Run hopper tests
	python3 -m pytest -vs hopper/test_mixed_io.py \
	hopper/test_tma_store_gemm.py

	- name: Run C++ unittests
	run: \|
	cd python
	cd "build/$(ls build \| grep -i cmake)"
	ctest

	- name: Inspect cache directory
	run: \|
	mkdir -p ~/.triton
	ls -alh ~/.triton
	du -sh ~/.triton/**

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Replace cvt instructions with bitwise operations in s8->bf16 conversions #7

Workflow file

Replace cvt instructions with bitwise operations in s8->bf16 conversions #7

Jobs

Run details

Workflow file for this run