Skip to content

Add multi-node-training #337

Add multi-node-training

Add multi-node-training #337

# cicd workflow for running tests with pytest
# needs to first install pdm, then install torch cpu manually and then install the package
# then run the tests
name: test (pip install, gpu)
on: [push, pull_request]
jobs:
tests:
runs-on: "cirun-aws-runner--${{ github.run_id }}"
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Install torch (GPU CUDA 12.1)
run: |
python -m pip install torch --index-url https://download.pytorch.org/whl/cu121
- name: Install package (including dev dependencies)
run: |
python -m pip install ".[dev]"
- name: Print and check torch version
run: |
python -c "import torch; print(torch.__version__)"
python -c "import torch; assert not torch.__version__.endswith('+cpu')"
- name: Load cache data
uses: actions/cache/restore@v4
with:
path: tests/datastore_examples/npyfilesmeps/meps_example_reduced.zip
key: ${{ runner.os }}-meps-reduced-example-data-v0.2.0
restore-keys: |
${{ runner.os }}-meps-reduced-example-data-v0.2.0
- name: Run tests
run: |
python -m pytest -vv -s
- name: Save cache data
uses: actions/cache/save@v4
with:
path: tests/datastore_examples/npyfilesmeps/meps_example_reduced.zip
key: ${{ runner.os }}-meps-reduced-example-data-v0.2.0