-
Notifications
You must be signed in to change notification settings - Fork 0
136 lines (113 loc) · 4.97 KB
/
CUDA_Build1.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
name: CUDA Build1
on:
workflow_dispatch: # Manually trigger the workflow
#on:
# push:
# branches:
# - main
# pull_request:
# branches:
# - main
permissions:
id-token: write
contents: read
jobs:
build:
runs-on: ubuntu-latest
services:
nn_backend:
image: nvidia/cuda:12.2.2-devel-ubuntu22.04
ports:
- "8080:8080"
options: --privileged # Required for GPU access
env:
NVIDIA_VISIBLE_DEVICES: all
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Azure Login
uses: azure/login@v2
with:
client-id: ${{ secrets.AZURE_CLIENT_ID }}
tenant-id: ${{ secrets.AZURE_TENANT_ID }}
subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
- name: Login to Azure Container Registry
uses: azure/docker-login@v1
with:
login-server: test1repo.azurecr.io
username: ${{ secrets.ACR_USERNAME }}
password: ${{ secrets.ACR_PASSWORD }}
- id: check-aks
name: Check if AKS Cluster exists
uses: azure/CLI@v2
with:
azcliversion: latest
inlineScript: |
result=$(az aks show --resource-group my_test --name myAKSCluster 2>/dev/null | jq -r '.id')
exit_code=$?
if [ $exit_code -ne 0 ]; then
echo "Error executing az aks show command. Exit code: $exit_code"
echo "Error message: $result"
echo "::error::Error executing az aks show command. Exit code: $exit_code"
exit $exit_code
fi
if [[ -z "$result" || "$result" == *"ResourceNotFound"* ]]; then
echo "cluster_exists=false" >> $GITHUB_OUTPUT
else
echo "cluster_exists=true" >> $GITHUB_OUTPUT
fi
- name: Create AKS Cluster
uses: azure/CLI@v2
with:
azcliversion: latest
inlineScript: |
if [ "${{steps.check-aks.outputs.cluster_exists}}" != "true" ]; then
az aks create --resource-group my_test --name myAKSCluster --node-count 1 --node-vm-size Standard_NV6ads_A10_v5 --enable-cluster-autoscaler --min-count 1 --max-count 1 --max-pods 30 --location centralindia --kubernetes-version 1.28.5 --generate-ssh-keys
else
echo "AKS cluster is present"
fi
- name: Get AKS Credentials
uses: azure/CLI@v2
with:
azcliversion: latest
inlineScript: |
if [ "${{steps.check-aks.outputs.cluster_exists}}" != "true" ]; then
AKS_ID=$(az aks show --resource-group my_test --name myAKSCluster --query id -o tsv)
az role assignment create --role "AcrPull" --assignee ${{ secrets.AZURE_CLIENT_ID }} --scope $AKS_ID > /dev/null
az aks update --resource-group my_test --name myAKSCluster --attach-acr test1repo > /dev/null
fi
az account set --subscription ${{ secrets.AZURE_SUBSCRIPTION_ID }}
az aks get-credentials --resource-group my_test --name myAKSCluster --overwrite-existing
- name: Build and deploy nn_backend
run: |
docker build ./NN_backend -t test1repo.azurecr.io/nn_backend:${GITHUB_RUN_NUMBER}
docker push test1repo.azurecr.io/nn_backend:${GITHUB_RUN_NUMBER}
- name: Build and deploy react_app
run: |
docker build ./react_app -t test1repo.azurecr.io/react_app:${GITHUB_RUN_NUMBER}
docker push test1repo.azurecr.io/react_app:${GITHUB_RUN_NUMBER}
- uses: azure/setup-kubectl@v3
- uses: azure/aks-set-context@v2.0
with:
resource-group: my_test
cluster-name: myAKSCluster
- name: Deploy NVIDIA DaemonSet to AKS
run: |
if [ "${{steps.check-aks.outputs.cluster_exists}}" != "true" ]; then
cd k8s
kubectl create namespace gpu-resources
kubectl apply -f nvidia-device-plugin-ds.yaml
fi
- name: Replace placeholders in Kubernetes YAML files
run: |
cd k8s
sed -i "s|image: test1repo.azurecr.io/react_app:\$imageTag|image: test1repo.azurecr.io/react_app:${GITHUB_RUN_NUMBER}|" react_app_Deployment.yaml
sed -i "s|image: test1repo.azurecr.io/nn_backend:\$imageTag|image: test1repo.azurecr.io/nn_backend:${GITHUB_RUN_NUMBER}|" nn_backend_Deployment.yaml
# kubectl set image -f deployment.template.yml react-app=test1repo.azurecr.io/react_app:$imageTag --local -o yaml > react_app_Deployment.yaml
# sed -i 's|${{ github.run_number }}|'"$GITHUB_RUN_NUMBER"'|g' react_app_Deployment.yaml
# echo "github.run_number ${{ github.run_number }} "
- name: Deploy APP to AKS
run: |
cd k8s
kubectl apply -f nn_backend_Service.yaml -f nn_backend_Deployment.yaml
kubectl apply -f react_app_Deployment.yaml -f react_app_Service.yaml