Skip to content

Fix FP8 checkpoint resumption with onnx export flag #4594

Fix FP8 checkpoint resumption with onnx export flag

Fix FP8 checkpoint resumption with onnx export flag #4594

Workflow file for this run

name: PR GPU tests
on:
pull_request_target:
workflow_dispatch:
# Cancel old runs when a new commit is pushed to the same branch if not on main or dev
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' && github.ref != 'refs/heads/dev' }}
jobs:
pytest-gpu:
uses: ./.github/workflows/pytest-gpu.yaml
strategy:
matrix:
include:
- name: 'gpu-3.10-2.1'
container: mosaicml/pytorch:2.1.0_cu121-python3.10-ubuntu20.04
markers: 'not daily and not remote and gpu and (doctest or not doctest)'
pytest_command: 'coverage run -m pytest'
composer_package_name: 'mosaicml'
# - name: 'gpu-3.10-2.2'
# container: mosaicml/pytorch:2.2.0_cu121-nightly20231213-python3.10-ubuntu20.04
# markers: 'not daily and not remote and gpu and (doctest or not doctest)'
# pytest_command: 'coverage run -m pytest'
# composer_package_name: 'mosaicml'
name: ${{ matrix.name }}
if: github.repository_owner == 'mosaicml'
with:
composer_package_name: ${{ matrix.composer_package_name }}
container: ${{ matrix.container }}
mcloud-timeout: 1500
name: ${{ matrix.name }}
pytest-command: ${{ matrix.pytest_command }}
pytest-markers: ${{ matrix.markers }}
python-version: 3.9
secrets:
mcloud-api-key: ${{ secrets.MCLOUD_API_KEY }}