-
Notifications
You must be signed in to change notification settings - Fork 1
/
cf_kubeflow_single_instance.yaml
290 lines (281 loc) · 9.05 KB
/
cf_kubeflow_single_instance.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
AWSTemplateFormatVersion: 2010-09-09
Mappings:
RegionMap:
eu-central-1:
AMI: ami-xxxxxxxxxxxxxxxxx
us-east-1:
AMI: ami-0b93ce03dcbcb10f6
us-gov-east-1:
AMI: ami-xxxxxxxxxxxxxxxxx
us-gov-west-1:
AMI: ami-xxxxxxxxxxxxxxxxx
us-east-2:
AMI: ami-xxxxxxxxxxxxxxxxx
us-west-1:
AMI: ami-xxxxxxxxxxxxxxxxx
us-west-2:
AMI: ami-xxxxxxxxxxxxxxxxx
ca-central-1:
AMI: ami-xxxxxxxxxxxxxxxxx
eu-west-1:
AMI: ami-xxxxxxxxxxxxxxxxx
eu-west-2:
AMI: ami-xxxxxxxxxxxxxxxxx
eu-west-3:
AMI: ami-xxxxxxxxxxxxxxxxx
eu-north-1:
AMI: ami-xxxxxxxxxxxxxxxxx
eu-south-1:
AMI: ami-xxxxxxxxxxxxxxxxx
ap-east-1:
AMI: ami-xxxxxxxxxxxxxxxxx
ap-southeast-1:
AMI: ami-xxxxxxxxxxxxxxxxx
ap-southeast-2:
AMI: ami-xxxxxxxxxxxxxxxxx
ap-south-1:
AMI: ami-xxxxxxxxxxxxxxxxx
ap-northeast-1:
AMI: ami-xxxxxxxxxxxxxxxxx
ap-northeast-2:
AMI: ami-xxxxxxxxxxxxxxxxx
me-south-1:
AMI: ami-xxxxxxxxxxxxxxxxx
Description: ""
Parameters:
AppName:
Description: Name of the application the cluster will serve
Type: String
MinLength: "3"
AllowedPattern: '[a-zA-Z0-9\(\)\.\-/_]+'
Default: "kubeflow-single-instance"
KeyPair:
Description: Amazon EC2 Key Pair used to ssh to the cluster nodes
Type: "AWS::EC2::KeyPair::KeyName"
InstanceTypeParameter:
Type: String
Default: t2.2xlarge
AllowedValues:
- t2.2xlarge
Description: Select the instance type of the cluster nodes
InstanceVolumeSize:
Type: Number
Default: 100
MinValue: 100
Description: Size of the (unencripted DeleteOnTermination) gp2 volume attatched to the instance
KubernetesVersion:
Type: String
Default: 1.22
AllowedValues:
- 1.23
- 1.22
- 1.21
Description: The Kubernetes version
KubeflowVersion:
Type: String
Default: 1.6/stable
AllowedValues:
- 1.6/stable
- latest/stable
- latest/edge
Description: The Kubeflow version to be deployed
KubeflowDashboardUsername:
Type: String
MinLength: "5"
AllowedPattern: '[a-zA-Z0-9\(\)\.\-/_@]+'
Default: "user123@email.com"
KubeflowDashboardPassword:
Type: String
NoEcho : "true"
MinLength: "5"
AllowedPattern: '[a-zA-Z0-9\(\)\.\-/_@]+'
Default: "user123"
Description: Default is user123
VpcCIDR:
Description: Please enter the IP range (CIDR notation) for this VPC or use Default
Type: String
Default: 10.192.0.0/16
PublicSubnet1CIDR:
Description: Please enter the IP range (CIDR notation) for the public subnet (random Availability Zone will be assigned)
Type: String
Default: 10.192.10.0/24
Resources:
SecurityGroupEgress:
Type: AWS::EC2::SecurityGroup
Properties:
GroupName: Kubeflow allow all egress rule
GroupDescription: "Kubeflow instance security group without ingress rules"
VpcId: !Ref VPC
EC2LaunchTemplate:
Type: "AWS::EC2::LaunchTemplate"
DependsOn: SecurityGroupEgress
Properties:
LaunchTemplateName: !Sub "${AppName}"
LaunchTemplateData:
UserData:
Fn::Base64: !Sub |
#!/bin/bash
apt update
apt -y upgrade
for snap in juju juju-wait charmcraft; do sudo snap install $snap --classic; done
snap install microk8s --classic --channel=${KubernetesVersion}/stable
sudo snap refresh charmcraft --channel latest/candidate
usermod -a -G microk8s ubuntu
mkdir /home/ubuntu/.kube
chown -f -R ubuntu /home/ubuntu/.kube
microk8s enable dns storage metallb:"10.64.140.43-10.64.140.49,192.168.0.105-192.168.0.111"
sleep 120
microk8s.kubectl wait --for=condition=available -nkube-system deployment/coredns deployment/hostpath-provisioner
microk8s.kubectl -n kube-system rollout status ds/calico-node
su ubuntu -c 'juju bootstrap microk8s uk8s-controller'
su ubuntu -c 'juju add-model kubeflow'
su ubuntu -c 'juju deploy kubeflow --channel=${KubeflowVersion} --trust'
su ubuntu -c 'juju config dex-auth public-url=http://10.64.140.43.nip.io; juju config oidc-gatekeeper public-url=http://10.64.140.43.nip.io; juju config dex-auth static-username=${KubeflowDashboardUsername}; juju config dex-auth static-password=${KubeflowDashboardPassword}'
sleep 720
echo "Charmed Kubeflow deployed"
su ubuntu -c 'juju run --unit istio-pilot/0 -- "export JUJU_DISPATCH_PATH=hooks/config-changed; ./dispatch"'
su ubuntu -c 'juju deploy mlflow-server'
su ubuntu -c 'juju deploy charmed-osm-mariadb-k8s mlflow-db'
su ubuntu -c 'juju relate minio mlflow-server'
su ubuntu -c 'juju relate istio-pilot mlflow-server'
su ubuntu -c 'juju relate mlflow-db mlflow-server'
su ubuntu -c 'juju relate mlflow-server admission-webhook'
echo "Charmed MlFlow deployed"
BlockDeviceMappings:
- DeviceName: "/dev/sda1"
Ebs:
Encrypted: false
DeleteOnTermination: true
VolumeSize: !Ref InstanceVolumeSize
VolumeType: "gp2"
IamInstanceProfile:
Arn: !GetAtt
- InstanceProfile
- Arn
EbsOptimized: false
KeyName: !Sub "${KeyPair}"
DisableApiTermination: false
ImageId: !FindInMap
- RegionMap
- !Ref AWS::Region
- AMI
InstanceType: !Sub "${InstanceTypeParameter}"
SecurityGroupIds:
- !Ref SecurityGroupEgress
EC2Role:
Type: "AWS::IAM::Role"
Properties:
Description: !Sub "${AppName} Role"
RoleName: !Sub "${AppName}-EC2Role"
AssumeRolePolicyDocument:
Version: "2012-10-17"
Statement:
- Effect: Allow
Principal:
Service:
- ec2.amazonaws.com
Action:
- "sts:AssumeRole"
Policies:
- PolicyName: root
PolicyDocument:
Version: "2012-10-17"
Statement:
- Action: ec2:CreateTags
Effect: Allow
Resource: "arn:*:ec2:*:*:instance/*"
- Action: ec2:DescribeTags
Effect: Allow
Resource: "*"
AutoScalingAutoScalingGroup:
Type: "AWS::AutoScaling::AutoScalingGroup"
Properties:
AutoScalingGroupName: !Sub "${AppName}"
LaunchTemplate:
LaunchTemplateId: !Ref EC2LaunchTemplate
Version: 1
MinSize: 1
MaxSize: 1
DesiredCapacity: 1
Cooldown: 300
VPCZoneIdentifier:
- !Ref PublicSubnet
HealthCheckType: "EC2"
HealthCheckGracePeriod: 300
TerminationPolicies:
- "NewestInstance"
ServiceLinkedRoleARN: !Sub "arn:aws:iam::${AWS::AccountId}:role/aws-service-role/autoscaling.amazonaws.com/AWSServiceRoleForAutoScaling_${AppName}"
NewInstancesProtectedFromScaleIn: false
DependsOn:
- EC2LaunchTemplate
- AutoScalingRole
- PublicSubnet
- PublicSubnetRouteTableAssociation
- SecurityGroupEgress
AutoScalingScalingPolicy:
Type: "AWS::AutoScaling::ScalingPolicy"
Properties:
AutoScalingGroupName: !Sub "${AppName}"
PolicyType: "SimpleScaling"
AdjustmentType: "ChangeInCapacity"
ScalingAdjustment: 1
DependsOn: "AutoScalingAutoScalingGroup"
AutoScalingRole:
Type: "AWS::IAM::ServiceLinkedRole"
Properties:
AWSServiceName: autoscaling.amazonaws.com
Description: AutoScaling ServiceLinked Role
CustomSuffix: !Sub "${AppName}"
InstanceProfile:
Type: AWS::IAM::InstanceProfile
DependsOn: "EC2Role"
Properties:
InstanceProfileName: !Sub "${AppName}-InstanceProfile"
Roles:
- !Ref EC2Role
VPC:
Type: AWS::EC2::VPC
Properties:
CidrBlock: !Ref VpcCIDR
EnableDnsSupport: true
EnableDnsHostnames: true
Tags:
- Key: Name
Value: !Join ['', [!Ref "AWS::StackName", "-VPC" ]]
InternetGateway:
Type: AWS::EC2::InternetGateway
DependsOn: VPC
AttachGateway:
Type: AWS::EC2::VPCGatewayAttachment
Properties:
VpcId: !Ref VPC
InternetGatewayId: !Ref InternetGateway
PublicSubnet:
Type: AWS::EC2::Subnet
Properties:
VpcId: !Ref VPC
CidrBlock: !Ref PublicSubnet1CIDR
AvailabilityZone: !Select [ 0, !GetAZs '' ]
MapPublicIpOnLaunch: true
Tags:
- Key: Name
Value: !Sub ${AWS::StackName}-Public-A
PublicRouteTable:
Type: AWS::EC2::RouteTable
Properties:
VpcId: !Ref VPC
Tags:
- Key: Name
Value: Public
PublicRoute1:
Type: AWS::EC2::Route
DependsOn: AttachGateway
Properties:
RouteTableId: !Ref PublicRouteTable
DestinationCidrBlock: 0.0.0.0/0
GatewayId: !Ref InternetGateway
PublicSubnetRouteTableAssociation:
Type: AWS::EC2::SubnetRouteTableAssociation
Properties:
SubnetId: !Ref PublicSubnet
RouteTableId: !Ref PublicRouteTable