Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update ort CIs (slow, gpu, train) #2024

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
8 changes: 4 additions & 4 deletions .github/workflows/test_onnxruntime.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: ONNX Runtime / Python - Test

on:
push:
branches: [main]
branches:
- main
pull_request:
branches: [main]
branches:
- main

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
Expand Down
65 changes: 47 additions & 18 deletions .github/workflows/test_onnxruntime_gpu.yml
Original file line number Diff line number Diff line change
@@ -1,30 +1,59 @@
name: ONNX Runtime / Test GPU
name: ONNX Runtime GPU / Python - Test

on:
workflow_dispatch:
schedule:
- cron: 0 1 */3 * * # at 1am every 3 days
push:
branches:
- main
pull_request:
types: [opened, synchronize, reopened, labeled]
# uncomment to enable on PR merge on main branch:
#push:
# branches:
# - main
branches:
- main
types:
- opened
- reopened
- labeled
- unlabeled
- synchronize

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

jobs:
do-the-job:
if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'gpu-test') }}
name: Start self-hosted EC2 runner
build:
if: ${{
(github.event_name == 'push') ||
(github.event_name == 'workflow_dispatch') ||
contains( github.event.pull_request.labels.*.name, 'gpu') ||
contains(github.event.pull_request.labels.*.name, 'onnxruntime-gpu')
}}

runs-on:
group: aws-g6-4xlarge-plus
env:
AWS_REGION: us-east-1

container:
image: nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04
options: --gpus all

steps:
- name: Checkout
uses: actions/checkout@v2
- name: Build image
uses: actions/checkout@v4

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: 3.8

- name: Install dependencies
run: |
pip install --upgrade pip
pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
pip install .[tests,onnxruntime-gpu]

- name: Replace opencv-python with opencv-python-headless
run: |
docker build -f tests/onnxruntime/docker/Dockerfile_onnxruntime_gpu -t onnxruntime-gpu .
- name: Test with unittest within docker container
pip uninstall -y opencv-python && pip install opencv-python-headless

- name: Test with pytest
run: |
docker run --rm --gpus all -v /mnt/cache/.cache/huggingface:/root/.cache/huggingface --workdir=/workspace/optimum/tests onnxruntime-gpu:latest
pytest tests/onnxruntime -m "cuda_ep_test" --durations=0 -vvvv -s -n auto
35 changes: 21 additions & 14 deletions .github/workflows/test_onnxruntime_slow.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
name: ONNX Runtime slow / Python - Test
name: ONNX Runtime / Python - Slow Test

on:
workflow_dispatch:
schedule:
- cron: 0 7 * * * # every day at 7am
# every day at 7am
- cron: 0 7 * * *

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
Expand All @@ -18,16 +19,22 @@ jobs:
os: [ubuntu-20.04]

runs-on: ${{ matrix.os }}

steps:
- uses: actions/checkout@v2
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies for export
run: |
pip install .[tests,onnxruntime]
- name: Test with unittest
working-directory: tests
run: |
RUN_SLOW=1 pytest onnxruntime -s -m "run_slow" --durations=0
- name: Checkout
uses: actions/checkout@v4

- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
pip install --upgrade pip
pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install .[tests,onnxruntime]

- name: Test with pytest
run: |
RUN_SLOW=1 pytest tests/onnxruntime -m "run_slow" --durations=0 -s -vvvv -n auto
26 changes: 0 additions & 26 deletions .github/workflows/test_onnxruntime_train.yml

This file was deleted.

64 changes: 64 additions & 0 deletions .github/workflows/test_onnxruntime_training.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name: ONNX Runtime Training / Python - Test

on:
workflow_dispatch:
push:
branches:
- main
pull_request:
branches:
- main
types:
- opened
- reopened
- labeled
- unlabeled
- synchronize

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

jobs:
do-the-job:
if: ${{
(github.event_name == 'push') ||
(github.event_name == 'workflow_dispatch') ||
contains( github.event.pull_request.labels.*.name, 'training') ||
contains( github.event.pull_request.labels.*.name, 'onnxruntime-training')
}}

runs-on:
group: aws-g6-4xlarge-plus

container:
image: nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
options: --gpus all

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: 3.8

- name: Install dependencies
env:
TORCH_CUDA_ARCH_LIST: "5.0 6.0 7.0 7.5 8.0 8.6 9.0+PTX"
run: |
pip install --upgrade pip
pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
pip install --no-cache-dir torch-ort onnxruntime-training && python -m torch_ort.configure
pip install --no-cache-dir evaluate absl-py rouge_score seqeval sacrebleu nltk scikit-learn
pip install .[tests,onnxruntime-gpu]

- name: Replace opencv-python with opencv-python-headless
run: |
pip uninstall -y opencv-python && pip install opencv-python-headless

- name: Test with pytest
run: |
RUN_SLOW=1 pytest tests/onnxruntime/training/nightly_test_trainer.py --durations=0 -s -vvvv
RUN_SLOW=1 pytest tests/onnxruntime/training/nightly_test_examples.py --durations=0 -s -vvvv
26 changes: 0 additions & 26 deletions tests/onnxruntime/docker/Dockerfile_onnxruntime_gpu

This file was deleted.

83 changes: 0 additions & 83 deletions tests/onnxruntime/docker/Dockerfile_onnxruntime_trainer

This file was deleted.

Loading
Loading