Skip to content

self-hosted-gpu-test #96

self-hosted-gpu-test

self-hosted-gpu-test #96

name: self-hosted-gpu-test
on:
push:
branches:
- master
- main
workflow_dispatch:
schedule:
# nightly tests
- cron: "0 0 * * *"
jobs:
start-runner:
name: Start self-hosted EC2 runner
runs-on: ubuntu-latest
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- name: Start EC2 runner
id: start-ec2-runner
uses: machulav/ec2-github-runner@main
with:
mode: start
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
ec2-image-id: ami-04d16a12bbc76ff0b
ec2-instance-type: g4dn.xlarge
subnet-id: subnet-0dee8543e12afe0cd # us-east-1a
security-group-id: sg-0f9809618550edb98
# iam-role-name: self-hosted-runner # optional, requires additional permissions
aws-resource-tags: > # optional, requires additional permissions
[
{"Key": "Name", "Value": "ec2-github-runner"},
{"Key": "GitHubRepository", "Value": "${{ github.repository }}"}
]
do-the-job:
name: Do the job on the runner
needs: start-runner # required to start the main job when the runner is ready
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
env:
TEST_MODE: GPU
OPENMM: ${{ matrix.cfg.openmm }}
OE_LICENSE: ${{ github.workspace }}/oe_license.txt
HOME: /home/ec2-user
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
defaults:
run:
shell: bash -l {0}
steps:
- uses: actions/checkout@v3
- uses: conda-incubator/setup-miniconda@v2
with:
installer-url: https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh
python-version: "3.10"
activate-environment: test
channels: conda-forge,defaults
environment-file: devtools/conda-envs/test_env.yaml
auto-activate-base: false
auto-update-conda: true
show-channel-urls: true
- name: Refine test env
shell: bash -l {0}
run: |
mamba install -y cudatoolkit==11.7 openmm>=8.0
- name: Additional info about the build
shell: bash -l {0}
run: |
uname -a
df -h
ulimit -a
conda info -a
conda list
python -c "import openmm; print(openmm.Platform.getPluginLoadFailures())"
python -m openmm.testInstallation
- name: Install package
shell: bash -l {0}
run: |
python -m pip install --no-deps -v .
- name: Environment Information
shell: bash -l {0}
run: |
conda info -a
conda list
- name: Test the package
shell: bash -l {0}
run: |
pytest -v --cov-report xml --durations=0 --cov=openmmtools openmmtools/tests
- name: Codecov
if: ${{ github.repository == 'choderalab/openmmtools'
&& github.event != 'schedule' }}
uses: codecov/codecov-action@v1
with:
file: ./coverage.xml
name: codecov-${{ matrix.cfg.os }}-py${{ matrix.cfg.python-version }}
flags: unittests
fail_ci_if_error: false
stop-runner:
name: Stop self-hosted EC2 runner
needs:
- start-runner # required to get output from the start-runner job
- do-the-job # required to wait when the main job is done
runs-on: ubuntu-20.04
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- name: Stop EC2 runner
uses: machulav/ec2-github-runner@main
with:
mode: stop
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
label: ${{ needs.start-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}