Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

new: changes for deploying mistral model on trn1.2xlarge instance #1235

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 130 additions & 0 deletions manifests/modules/aiml/chatbot/gradio-mistral/gradio-ui.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
apiVersion: v1
kind: Namespace
metadata:
name: gradio-mistral-tran1
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: gradio-deployment
namespace: gradio-mistral-tran1
labels:
app: gradio
spec:
replicas: 1
selector:
matchLabels:
app: gradio
template:
metadata:
labels:
app: gradio
spec:
containers:
- name: gradio
image: public.ecr.aws/data-on-eks/gradio-web-app-base:latest
imagePullPolicy: IfNotPresent
ports:
- containerPort: 7860
resources:
requests:
cpu: "512m"
memory: "2048Mi"
limits:
cpu: "1"
memory: "4096Mi"
env:
- name: MODEL_ENDPOINT
value: "/infer"
- name: SERVICE_NAME
value: "http://mistral-serve-svc.mistral.svc.cluster.local:8000"
volumeMounts:
- name: gradio-app-script
mountPath: /app/gradio-app.py
subPath: gradio-app-mistral-tran1.py
volumes:
- name: gradio-app-script
configMap:
name: gradio-app-script
---
apiVersion: v1
kind: Service
metadata:
name: gradio-service
namespace: gradio-mistral-tran1
annotations:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
spec:
selector:
app: gradio
ports:
- name: http
protocol: TCP
port: 80
targetPort: 7860
type: LoadBalancer
---
apiVersion: v1
kind: ConfigMap
metadata:
name: gradio-app-script
namespace: gradio-mistral-tran1
data:
gradio-app-mistral-tran1.py: |
import gradio as gr
import requests
import os

# Constants for model endpoint and service name
model_endpoint = "/infer"
service_name = os.environ.get("SERVICE_NAME", "http://localhost:8000")

# Function to generate text
def text_generation(message, history):
prompt = message

# Create the URL for the inference
url = f"{service_name}{model_endpoint}"

try:
# Send the request to the model service
response = requests.get(url, params={"sentence": prompt}, timeout=180)
response.raise_for_status() # Raise an exception for HTTP errors

full_output = response.json()[0]
# Removing the original question from the output
answer_only = full_output.replace(prompt, "", 1).strip('["]?\n')

# Safety filter to remove harmful or inappropriate content
answer_only = filter_harmful_content(answer_only)
return answer_only
except requests.exceptions.RequestException as e:
# Handle any request exceptions (e.g., connection errors)
return f"AI: Error: {str(e)}"

# Define the safety filter function (you can implement this as needed)
def filter_harmful_content(text):
# TODO: Implement a safety filter to remove any harmful or inappropriate content from the text

# For now, simply return the text as-is
return text

# Define the Gradio ChatInterface
chat_interface = gr.ChatInterface(
text_generation,
chatbot=gr.Chatbot(line_breaks=True),
textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
title="neuron-mistral7bv0.3 AI Chat",
description="Ask me any question",
theme="soft",
examples=["How many languages are in India", "What is Generative AI?"],
cache_examples=False,
retry_btn=None,
undo_btn="Delete Previous",
clear_btn="Clear",
)

# Launch the ChatInterface
chat_interface.launch(server_name="0.0.0.0")
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- gradio-ui.yaml
1 change: 1 addition & 0 deletions manifests/modules/aiml/chatbot/nodepool/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ kind: Kustomization
resources:
- nodepool-inf2.yaml
- nodepool-x86.yaml
- nodepool-tran1.yaml
67 changes: 67 additions & 0 deletions manifests/modules/aiml/chatbot/nodepool/nodepool-tran1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
apiVersion: karpenter.sh/v1
kind: NodePool
metadata:
name: trainium-tran1
spec:
template:
metadata:
labels:
instanceType: trainium
provisionerType: Karpenter
neuron.amazonaws.com/neuron-device: "true"
spec:
startupTaints:
- key: node.kubernetes.io/not-ready
effect: "NoExecute"
taints:
- key: aws.amazon.com/neuron
effect: "NoSchedule"
requirements:
- key: node.kubernetes.io/instance-type
operator: In
values: ["trn1.2xlarge"]
- key: "kubernetes.io/arch"
operator: In
values: ["amd64"]
- key: "karpenter.sh/capacity-type"
operator: In
values: ["on-demand", "spot"]
expireAfter: 720h
terminationGracePeriod: 24h
nodeClassRef:
group: karpenter.k8s.aws
kind: EC2NodeClass
name: trainium-tran1
limits:
cpu: 100
memory: 400Gi
aws.amazon.com/neuron: 10
disruption:
consolidateAfter: 300s
consolidationPolicy: WhenEmptyOrUnderutilized

---
apiVersion: karpenter.k8s.aws/v1
kind: EC2NodeClass
metadata:
name: trainium-tran1
spec:
amiFamily: AL2
amiSelectorTerms:
- alias: al2@latest
blockDeviceMappings:
- deviceName: /dev/xvda
ebs:
deleteOnTermination: true
encrypted: true
volumeSize: 500Gi
volumeType: gp3
role: ${KARPENTER_NODE_ROLE}
securityGroupSelectorTerms:
- tags:
karpenter.sh/discovery: ${EKS_CLUSTER_NAME}
subnetSelectorTerms:
- tags:
karpenter.sh/discovery: ${EKS_CLUSTER_NAME}
tags:
app.kubernetes.io/created-by: eks-workshop
3 changes: 3 additions & 0 deletions manifests/modules/aiml/chatbot/nodepool/nodepool-x86.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ spec:
provisionerType: Karpenter
workload: rayhead
spec:
startupTaints:
- key: node.kubernetes.io/not-ready
effect: "NoExecute"
requirements:
- key: "karpenter.k8s.aws/instance-family"
operator: In
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# https://hub.docker.com/layers/rayproject/ray/2.11.0-py310/images/sha256-de798e487b76a8f2412c718c43c5f342b3eb05e0705a71325102904cd27c3613?context=explore
FROM rayproject/ray:2.32.0-py310

# Maintainer label
LABEL maintainer="DoEKS"

# Set environment variables to non-interactive (this prevents some prompts)
ENV DEBIAN_FRONTEND=non-interactive

# Switch to root to add Neuron repo and install necessary packages
USER root

# Set up the Neuron repository and install Neuron packages
RUN . /etc/os-release && \
sudo echo "deb https://apt.repos.neuron.amazonaws.com ${VERSION_CODENAME} main" > /etc/apt/sources.list.d/neuron.list && \
sudo wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - && \
sudo apt-get update -y && \
sudo apt-get install aws-neuronx-dkms aws-neuronx-collectives=2.* aws-neuronx-runtime-lib=2.* aws-neuronx-tools=2.* -y && \
sudo apt-get clean && \
sudo rm -rf /var/lib/apt/lists/*



# Switch back to a non-root user for the subsequent commands
USER $USER

# Set pip repository pointing to the Neuron repository and install required Python packages
RUN pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com && \
pip install wget awscli regex neuronx-cc==2.* torch-neuronx torchvision transformers-neuronx sentencepiece transformers huggingface_hub tenacity psutil fastapi uvicorn mistral-inference mistral-common


# Add Neuron path to PATH
ENV PATH /opt/aws/neuron/bin:$PATH

# Set LD_LIBRARY_PATH to include the directory with libpython3.10.so.1.0
ENV LD_LIBRARY_PATH /home/ray/anaconda3/lib:$LD_LIBRARY_PATH

# Create cache directories
RUN mkdir -p /serve_app

# Set working directory
WORKDIR /serve_app

COPY mistral1.py /serve_app/mistral1.py


Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ray_service_mistral.yaml
Loading