aws-samples · vishdivg · Jan 28, 2025 · Jan 28, 2025 · Jan 28, 2025
diff --git a/manifests/modules/aiml/chatbot/gradio-mistral/gradio-ui.yaml b/manifests/modules/aiml/chatbot/gradio-mistral/gradio-ui.yaml
@@ -0,0 +1,130 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: gradio-mistral-tran1
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: gradio-deployment
+  namespace: gradio-mistral-tran1
+  labels:
+    app: gradio
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: gradio
+  template:
+    metadata:
+      labels:
+        app: gradio
+    spec:
+      containers:
+        - name: gradio
+          image: public.ecr.aws/data-on-eks/gradio-web-app-base:latest
+          imagePullPolicy: IfNotPresent
+          ports:
+            - containerPort: 7860
+          resources:
+            requests:
+              cpu: "512m"
+              memory: "2048Mi"
+            limits:
+              cpu: "1"
+              memory: "4096Mi"
+          env:
+            - name: MODEL_ENDPOINT
+              value: "/infer"
+            - name: SERVICE_NAME
+              value: "http://mistral-serve-svc.mistral.svc.cluster.local:8000"
+          volumeMounts:
+            - name: gradio-app-script
+              mountPath: /app/gradio-app.py
+              subPath: gradio-app-mistral-tran1.py
+      volumes:
+        - name: gradio-app-script
+          configMap:
+            name: gradio-app-script
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: gradio-service
+  namespace: gradio-mistral-tran1
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+spec:
+  selector:
+    app: gradio
+  ports:
+    - name: http
+      protocol: TCP
+      port: 80
+      targetPort: 7860
+  type: LoadBalancer
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: gradio-app-script
+  namespace: gradio-mistral-tran1
+data:
+  gradio-app-mistral-tran1.py: |
+    import gradio as gr
+    import requests
+    import os
+
+    # Constants for model endpoint and service name
+    model_endpoint = "/infer"
+    service_name = os.environ.get("SERVICE_NAME", "http://localhost:8000")
+
+    # Function to generate text
+    def text_generation(message, history):
+        prompt = message
+
+        # Create the URL for the inference
+        url = f"{service_name}{model_endpoint}"
+
+        try:
+            # Send the request to the model service
+            response = requests.get(url, params={"sentence": prompt}, timeout=180)
+            response.raise_for_status()  # Raise an exception for HTTP errors
+
+            full_output = response.json()[0]
+            # Removing the original question from the output
+            answer_only = full_output.replace(prompt, "", 1).strip('["]?\n')
+
+            # Safety filter to remove harmful or inappropriate content
+            answer_only = filter_harmful_content(answer_only)
+            return answer_only
+        except requests.exceptions.RequestException as e:
+            # Handle any request exceptions (e.g., connection errors)
+            return f"AI: Error: {str(e)}"
+
+    # Define the safety filter function (you can implement this as needed)
+    def filter_harmful_content(text):
+        # TODO: Implement a safety filter to remove any harmful or inappropriate content from the text
+
+        # For now, simply return the text as-is
+        return text
+
+    # Define the Gradio ChatInterface
+    chat_interface = gr.ChatInterface(
+        text_generation,
+        chatbot=gr.Chatbot(line_breaks=True),
+        textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
+        title="neuron-mistral7bv0.3 AI Chat",
+        description="Ask me any question",
+        theme="soft",
+        examples=["How many languages are in India", "What is Generative AI?"],
+        cache_examples=False,
+        retry_btn=None,
+        undo_btn="Delete Previous",
+        clear_btn="Clear",
+    )
+
+    # Launch the ChatInterface
+    chat_interface.launch(server_name="0.0.0.0")
diff --git a/manifests/modules/aiml/chatbot/gradio-mistral/kustomization.yaml b/manifests/modules/aiml/chatbot/gradio-mistral/kustomization.yaml
@@ -0,0 +1,4 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+  - gradio-ui.yaml
diff --git a/manifests/modules/aiml/chatbot/nodepool/kustomization.yaml b/manifests/modules/aiml/chatbot/nodepool/kustomization.yaml
@@ -3,3 +3,4 @@ kind: Kustomization
 resources:
   - nodepool-inf2.yaml
   - nodepool-x86.yaml
+  - nodepool-tran1.yaml
diff --git a/manifests/modules/aiml/chatbot/nodepool/nodepool-tran1.yaml b/manifests/modules/aiml/chatbot/nodepool/nodepool-tran1.yaml
@@ -0,0 +1,67 @@
+apiVersion: karpenter.sh/v1
+kind: NodePool
+metadata:
+  name: trainium-tran1
+spec:
+  template:
+    metadata:
+      labels:
+        instanceType: trainium
+        provisionerType: Karpenter
+        neuron.amazonaws.com/neuron-device: "true"
+    spec:
+      startupTaints:
+        - key: node.kubernetes.io/not-ready
+          effect: "NoExecute"
+      taints:
+        - key: aws.amazon.com/neuron
+          effect: "NoSchedule"
+      requirements:
+        - key: node.kubernetes.io/instance-type
+          operator: In
+          values: ["trn1.2xlarge"]
+        - key: "kubernetes.io/arch"
+          operator: In
+          values: ["amd64"]
+        - key: "karpenter.sh/capacity-type"
+          operator: In
+          values: ["on-demand", "spot"]
+      expireAfter: 720h
+      terminationGracePeriod: 24h
+      nodeClassRef:
+        group: karpenter.k8s.aws
+        kind: EC2NodeClass
+        name: trainium-tran1
+  limits:
+    cpu: 100
+    memory: 400Gi
+    aws.amazon.com/neuron: 10
+  disruption:
+    consolidateAfter: 300s
+    consolidationPolicy: WhenEmptyOrUnderutilized
+
+---
+apiVersion: karpenter.k8s.aws/v1
+kind: EC2NodeClass
+metadata:
+  name: trainium-tran1
+spec:
+  amiFamily: AL2
+  amiSelectorTerms:
+    - alias: al2@latest
+  blockDeviceMappings:
+    - deviceName: /dev/xvda
+      ebs:
+        deleteOnTermination: true
+        encrypted: true
+        volumeSize: 500Gi
+        volumeType: gp3
+  role: ${KARPENTER_NODE_ROLE}
+  securityGroupSelectorTerms:
+    - tags:
+        karpenter.sh/discovery: ${EKS_CLUSTER_NAME}
+  subnetSelectorTerms:
+    - tags:
+        karpenter.sh/discovery: ${EKS_CLUSTER_NAME}
+  tags:
+    app.kubernetes.io/created-by: eks-workshop
diff --git a/manifests/modules/aiml/chatbot/nodepool/nodepool-x86.yaml b/manifests/modules/aiml/chatbot/nodepool/nodepool-x86.yaml
@@ -11,6 +11,9 @@ spec:
         provisionerType: Karpenter
         workload: rayhead
     spec:
+      startupTaints:
+        - key: node.kubernetes.io/not-ready
+          effect: "NoExecute"
       requirements:
         - key: "karpenter.k8s.aws/instance-family"
           operator: In

diff --git a/manifests/modules/aiml/chatbot/ray-service-neuron-mistral-chatbot/Dockerfile b/manifests/modules/aiml/chatbot/ray-service-neuron-mistral-chatbot/Dockerfile
@@ -0,0 +1,46 @@
+# https://hub.docker.com/layers/rayproject/ray/2.11.0-py310/images/sha256-de798e487b76a8f2412c718c43c5f342b3eb05e0705a71325102904cd27c3613?context=explore
+FROM rayproject/ray:2.32.0-py310
+
+# Maintainer label
+LABEL maintainer="DoEKS"
+
+# Set environment variables to non-interactive (this prevents some prompts)
+ENV DEBIAN_FRONTEND=non-interactive
+
+# Switch to root to add Neuron repo and install necessary packages
+USER root
+
+# Set up the Neuron repository and install Neuron packages
+RUN . /etc/os-release && \
+    sudo echo "deb https://apt.repos.neuron.amazonaws.com ${VERSION_CODENAME} main" > /etc/apt/sources.list.d/neuron.list && \
+    sudo wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - && \
+    sudo apt-get update -y && \
+    sudo apt-get install aws-neuronx-dkms aws-neuronx-collectives=2.* aws-neuronx-runtime-lib=2.* aws-neuronx-tools=2.* -y && \
+    sudo apt-get clean && \
+    sudo rm -rf /var/lib/apt/lists/*
+
+
+
+# Switch back to a non-root user for the subsequent commands
+USER $USER
+
+# Set pip repository pointing to the Neuron repository and install required Python packages
+RUN pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com && \
+    pip install wget awscli regex neuronx-cc==2.* torch-neuronx torchvision transformers-neuronx sentencepiece transformers huggingface_hub tenacity psutil fastapi uvicorn mistral-inference mistral-common
+
+
+# Add Neuron path to PATH
+ENV PATH /opt/aws/neuron/bin:$PATH
+
+# Set LD_LIBRARY_PATH to include the directory with libpython3.10.so.1.0
+ENV LD_LIBRARY_PATH /home/ray/anaconda3/lib:$LD_LIBRARY_PATH
+
+# Create cache directories
+RUN mkdir -p /serve_app
+
+# Set working directory
+WORKDIR /serve_app
+
+COPY mistral1.py /serve_app/mistral1.py
+
+
diff --git a/manifests/modules/aiml/chatbot/ray-service-neuron-mistral-chatbot/kustomization.yaml b/manifests/modules/aiml/chatbot/ray-service-neuron-mistral-chatbot/kustomization.yaml
@@ -0,0 +1,4 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+  - ray_service_mistral.yaml