Skip to content

Commit

Permalink
Remove EC2 jobs
Browse files Browse the repository at this point in the history
We also remove driver reinstallation step
  • Loading branch information
glegendre01 authored and dacorvo committed Nov 13, 2023
1 parent c29def2 commit 3d8e2e6
Showing 1 changed file with 1 addition and 75 deletions.
76 changes: 1 addition & 75 deletions .github/workflows/test_inf1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,65 +15,14 @@ concurrency:
cancel-in-progress: true

jobs:
start-runner:
name: Start self-hosted EC2 runner
runs-on: ubuntu-latest
env:
AWS_REGION: us-east-1
EC2_AMI_ID: ${{ vars.INFERENTIA_AMI_ID }}
EC2_INSTANCE_TYPE: inf1.6xlarge
EC2_SUBNET_ID: subnet-859322b4,subnet-b7533b96,subnet-47cfad21,subnet-a396b2ad,subnet-06576a4b,subnet-df0f6180
EC2_SECURITY_GROUP: sg-0bb210cd3ec725a13
EC2_IAM_ROLE: optimum-ec2-github-actions-role
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}
- name: Start EC2 runner
id: start-ec2-runner
uses: philschmid/philschmid-ec2-github-runner@main
with:
mode: start
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
ec2-image-id: ${{ env.EC2_AMI_ID }}
ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }}
subnet-id: ${{ env.EC2_SUBNET_ID }}
security-group-id: ${{ env.EC2_SECURITY_GROUP }}
iam-role-name: ${{ env.EC2_IAM_ROLE }}
aws-resource-tags: > # optional, requires additional permissions
[
{"Key": "Name", "Value": "ec2-optimum-github-runner"},
{"Key": "GitHubRepository", "Value": "${{ github.repository }}"}
]
do-the-job:
name: Run INF1 tests
needs: start-runner # required to start the main job when the runner is ready
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
runs-on: [self-hosted, 4-aws-inf1, 24-cpu, ci]
env:
AWS_REGION: us-east-1
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Re-install neuron driver
run: |
sudo apt remove aws-neuronx-dkms -y
sudo apt remove aws-neuronx-tools -y
. /etc/os-release
sudo tee /etc/apt/sources.list.d/neuron.list > /dev/null <<EOF
deb https://apt.repos.neuron.amazonaws.com ${VERSION_CODENAME} main
EOF
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
sudo apt-get update -y
sudo apt-get install linux-headers-$(uname -r) -y
sudo apt-get install aws-neuronx-dkms=2.* -y
sudo apt-get install aws-neuronx-tools=2.* -y
export PATH=/opt/aws/neuron/bin:$PATH
- name: Install python dependencies
run: |
sudo apt install python3.8-venv -y
Expand All @@ -88,26 +37,3 @@ jobs:
run: |
source aws_neuron_venv_pytorch/bin/activate
HF_TOKEN_OPTIMUM_NEURON_CI=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests
stop-runner:
name: Stop self-hosted EC2 runner
needs:
- start-runner # required to get output from the start-runner job
- do-the-job # required to wait when the main job is done
runs-on: ubuntu-latest
env:
AWS_REGION: us-east-1
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ env.AWS_REGION }}
- name: Stop EC2 runner
uses: philschmid/philschmid-ec2-github-runner@main
with:
mode: stop
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
label: ${{ needs.start-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}

0 comments on commit 3d8e2e6

Please sign in to comment.