diff --git a/manifests/modules/aiml/chatbot/gradio-mistral/gradio-ui.yaml b/manifests/modules/aiml/chatbot/gradio-mistral/gradio-ui.yaml
new file mode 100644
index 000000000..8cf572c20
--- /dev/null
+++ b/manifests/modules/aiml/chatbot/gradio-mistral/gradio-ui.yaml
@@ -0,0 +1,130 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: gradio-mistral-tran1
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: gradio-deployment
+  namespace: gradio-mistral-tran1
+  labels:
+    app: gradio
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: gradio
+  template:
+    metadata:
+      labels:
+        app: gradio
+    spec:
+      containers:
+        - name: gradio
+          image: public.ecr.aws/data-on-eks/gradio-web-app-base:latest
+          imagePullPolicy: IfNotPresent
+          ports:
+            - containerPort: 7860
+          resources:
+            requests:
+              cpu: "512m"
+              memory: "2048Mi"
+            limits:
+              cpu: "1"
+              memory: "4096Mi"
+          env:
+            - name: MODEL_ENDPOINT
+              value: "/infer"
+            - name: SERVICE_NAME
+              value: "http://mistral-serve-svc.mistral.svc.cluster.local:8000"
+          volumeMounts:
+            - name: gradio-app-script
+              mountPath: /app/gradio-app.py
+              subPath: gradio-app-mistral-tran1.py
+      volumes:
+        - name: gradio-app-script
+          configMap:
+            name: gradio-app-script
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: gradio-service
+  namespace: gradio-mistral-tran1
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+spec:
+  selector:
+    app: gradio
+  ports:
+    - name: http
+      protocol: TCP
+      port: 80
+      targetPort: 7860
+  type: LoadBalancer
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: gradio-app-script
+  namespace: gradio-mistral-tran1
+data:
+  gradio-app-mistral-tran1.py: |
+    import gradio as gr
+    import requests
+    import os
+
+    # Constants for model endpoint and service name
+    model_endpoint = "/infer"
+    service_name = os.environ.get("SERVICE_NAME", "http://localhost:8000")
+
+    # Function to generate text
+    def text_generation(message, history):
+        prompt = message
+
+        # Create the URL for the inference
+        url = f"{service_name}{model_endpoint}"
+
+        try:
+            # Send the request to the model service
+            response = requests.get(url, params={"sentence": prompt}, timeout=180)
+            response.raise_for_status()  # Raise an exception for HTTP errors
+
+            full_output = response.json()[0]
+            # Removing the original question from the output
+            answer_only = full_output.replace(prompt, "", 1).strip('["]?\n')
+
+            # Safety filter to remove harmful or inappropriate content
+            answer_only = filter_harmful_content(answer_only)
+            return answer_only
+        except requests.exceptions.RequestException as e:
+            # Handle any request exceptions (e.g., connection errors)
+            return f"AI: Error: {str(e)}"
+
+    # Define the safety filter function (you can implement this as needed)
+    def filter_harmful_content(text):
+        # TODO: Implement a safety filter to remove any harmful or inappropriate content from the text
+
+        # For now, simply return the text as-is
+        return text
+
+    # Define the Gradio ChatInterface
+    chat_interface = gr.ChatInterface(
+        text_generation,
+        chatbot=gr.Chatbot(line_breaks=True),
+        textbox=gr.Textbox(placeholder="Ask me a question", container=False, scale=7),
+        title="neuron-mistral7bv0.3 AI Chat",
+        description="Ask me any question",
+        theme="soft",
+        examples=["How many languages are in India", "What is Generative AI?"],
+        cache_examples=False,
+        retry_btn=None,
+        undo_btn="Delete Previous",
+        clear_btn="Clear",
+    )
+
+    # Launch the ChatInterface
+    chat_interface.launch(server_name="0.0.0.0")
diff --git a/manifests/modules/aiml/chatbot/gradio-mistral/kustomization.yaml b/manifests/modules/aiml/chatbot/gradio-mistral/kustomization.yaml
new file mode 100644
index 000000000..1cca24122
--- /dev/null
+++ b/manifests/modules/aiml/chatbot/gradio-mistral/kustomization.yaml
@@ -0,0 +1,4 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+  - gradio-ui.yaml
diff --git a/manifests/modules/aiml/chatbot/nodepool/kustomization.yaml b/manifests/modules/aiml/chatbot/nodepool/kustomization.yaml
index b0f432bde..64ff67cc9 100644
--- a/manifests/modules/aiml/chatbot/nodepool/kustomization.yaml
+++ b/manifests/modules/aiml/chatbot/nodepool/kustomization.yaml
@@ -3,3 +3,4 @@ kind: Kustomization
 resources:
   - nodepool-inf2.yaml
   - nodepool-x86.yaml
+  - nodepool-tran1.yaml
diff --git a/manifests/modules/aiml/chatbot/nodepool/nodepool-tran1.yaml b/manifests/modules/aiml/chatbot/nodepool/nodepool-tran1.yaml
new file mode 100644
index 000000000..f3cb372d3
--- /dev/null
+++ b/manifests/modules/aiml/chatbot/nodepool/nodepool-tran1.yaml
@@ -0,0 +1,67 @@
+apiVersion: karpenter.sh/v1
+kind: NodePool
+metadata:
+  name: trainium-tran1
+spec:
+  template:
+    metadata:
+      labels:
+        instanceType: trainium
+        provisionerType: Karpenter
+        neuron.amazonaws.com/neuron-device: "true"
+    spec:
+      startupTaints:
+        - key: node.kubernetes.io/not-ready
+          effect: "NoExecute"
+      taints:
+        - key: aws.amazon.com/neuron
+          effect: "NoSchedule"
+      requirements:
+        - key: node.kubernetes.io/instance-type
+          operator: In
+          values: ["trn1.2xlarge"]
+        - key: "kubernetes.io/arch"
+          operator: In
+          values: ["amd64"]
+        - key: "karpenter.sh/capacity-type"
+          operator: In
+          values: ["on-demand", "spot"]
+      expireAfter: 720h
+      terminationGracePeriod: 24h
+      nodeClassRef:
+        group: karpenter.k8s.aws
+        kind: EC2NodeClass
+        name: trainium-tran1
+  limits:
+    cpu: 100
+    memory: 400Gi
+    aws.amazon.com/neuron: 10
+  disruption:
+    consolidateAfter: 300s
+    consolidationPolicy: WhenEmptyOrUnderutilized
+
+---
+apiVersion: karpenter.k8s.aws/v1
+kind: EC2NodeClass
+metadata:
+  name: trainium-tran1
+spec:
+  amiFamily: AL2
+  amiSelectorTerms:
+    - alias: al2@latest
+  blockDeviceMappings:
+    - deviceName: /dev/xvda
+      ebs:
+        deleteOnTermination: true
+        encrypted: true
+        volumeSize: 500Gi
+        volumeType: gp3
+  role: ${KARPENTER_NODE_ROLE}
+  securityGroupSelectorTerms:
+    - tags:
+        karpenter.sh/discovery: ${EKS_CLUSTER_NAME}
+  subnetSelectorTerms:
+    - tags:
+        karpenter.sh/discovery: ${EKS_CLUSTER_NAME}
+  tags:
+    app.kubernetes.io/created-by: eks-workshop
diff --git a/manifests/modules/aiml/chatbot/nodepool/nodepool-x86.yaml b/manifests/modules/aiml/chatbot/nodepool/nodepool-x86.yaml
index 41937cfaf..6c4027803 100644
--- a/manifests/modules/aiml/chatbot/nodepool/nodepool-x86.yaml
+++ b/manifests/modules/aiml/chatbot/nodepool/nodepool-x86.yaml
@@ -11,6 +11,9 @@ spec:
         provisionerType: Karpenter
         workload: rayhead
     spec:
+      startupTaints:
+        - key: node.kubernetes.io/not-ready
+          effect: "NoExecute"
       requirements:
         - key: "karpenter.k8s.aws/instance-family"
           operator: In
diff --git a/manifests/modules/aiml/chatbot/ray-service-neuron-mistral-chatbot/Dockerfile b/manifests/modules/aiml/chatbot/ray-service-neuron-mistral-chatbot/Dockerfile
new file mode 100644
index 000000000..c5e7276a2
--- /dev/null
+++ b/manifests/modules/aiml/chatbot/ray-service-neuron-mistral-chatbot/Dockerfile
@@ -0,0 +1,46 @@
+# https://hub.docker.com/layers/rayproject/ray/2.11.0-py310/images/sha256-de798e487b76a8f2412c718c43c5f342b3eb05e0705a71325102904cd27c3613?context=explore
+FROM rayproject/ray:2.32.0-py310
+
+# Maintainer label
+LABEL maintainer="DoEKS"
+
+# Set environment variables to non-interactive (this prevents some prompts)
+ENV DEBIAN_FRONTEND=non-interactive
+
+# Switch to root to add Neuron repo and install necessary packages
+USER root
+
+# Set up the Neuron repository and install Neuron packages
+RUN . /etc/os-release && \
+    sudo echo "deb https://apt.repos.neuron.amazonaws.com ${VERSION_CODENAME} main" > /etc/apt/sources.list.d/neuron.list && \
+    sudo wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - && \
+    sudo apt-get update -y && \
+    sudo apt-get install aws-neuronx-dkms aws-neuronx-collectives=2.* aws-neuronx-runtime-lib=2.* aws-neuronx-tools=2.* -y && \
+    sudo apt-get clean && \
+    sudo rm -rf /var/lib/apt/lists/*
+
+
+
+# Switch back to a non-root user for the subsequent commands
+USER $USER
+
+# Set pip repository pointing to the Neuron repository and install required Python packages
+RUN pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com && \
+    pip install wget awscli regex neuronx-cc==2.* torch-neuronx torchvision transformers-neuronx sentencepiece transformers huggingface_hub tenacity psutil fastapi uvicorn mistral-inference mistral-common
+
+
+# Add Neuron path to PATH
+ENV PATH /opt/aws/neuron/bin:$PATH
+
+# Set LD_LIBRARY_PATH to include the directory with libpython3.10.so.1.0
+ENV LD_LIBRARY_PATH /home/ray/anaconda3/lib:$LD_LIBRARY_PATH
+
+# Create cache directories
+RUN mkdir -p /serve_app
+
+# Set working directory
+WORKDIR /serve_app
+
+COPY mistral1.py /serve_app/mistral1.py
+
+
diff --git a/manifests/modules/aiml/chatbot/ray-service-neuron-mistral-chatbot/kustomization.yaml b/manifests/modules/aiml/chatbot/ray-service-neuron-mistral-chatbot/kustomization.yaml
new file mode 100644
index 000000000..1f5a41bc2
--- /dev/null
+++ b/manifests/modules/aiml/chatbot/ray-service-neuron-mistral-chatbot/kustomization.yaml
@@ -0,0 +1,4 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+  - ray_service_mistral.yaml
diff --git a/manifests/modules/aiml/chatbot/ray-service-neuron-mistral-chatbot/mistral1.py b/manifests/modules/aiml/chatbot/ray-service-neuron-mistral-chatbot/mistral1.py
new file mode 100644
index 000000000..ba8fcedd4
--- /dev/null
+++ b/manifests/modules/aiml/chatbot/ray-service-neuron-mistral-chatbot/mistral1.py
@@ -0,0 +1,142 @@
+import os
+import json
+import logging
+from fastapi import FastAPI
+from ray import serve
+import torch
+import torch_neuronx
+from transformers import AutoTokenizer
+from transformers_neuronx.mistral.model import MistralForSampling
+from huggingface_hub import snapshot_download
+
+# Initialize FastAPI
+app = FastAPI()
+
+neuron_cores = int(os.getenv('NEURON_CORES', 2))  # Default to 2 for trn1.2xlarge
+cacheDir = os.path.join('/tmp','model','neuron-mistral7bv0.3')
+
+# --- Logging Setup ---
+logger = logging.getLogger("ray.serve")
+logger.setLevel(logging.INFO)
+logging.basicConfig(level=logging.INFO)
+
+@serve.deployment(num_replicas=1)
+@serve.ingress(app)
+class APIIngress:
+    def __init__(self, mistral_model_handle):
+        self.handle = mistral_model_handle
+
+    @app.get("/infer")
+    async def infer(self, sentence: str):
+        result = await self.handle.infer.remote(sentence)
+        return result
+
+@serve.deployment(
+    name="mistral-7b",
+    autoscaling_config={"min_replicas": 1, "max_replicas": 1},
+    ray_actor_options={
+        "resources": {"neuron_cores": neuron_cores},
+        "memory": 28000000000
+    }
+)
+class MistralModel:
+    def __init__(self):
+        try:
+            logger.info("Initializing model with pre-compiled files...")
+
+            mistral_model = os.getenv('MODEL_ID', 'askulkarni2/neuron-mistral7bv0.3')
+            logger.info(f"Using model ID: {mistral_model}")
+            
+            model_path='/tmp/model/neuron-mistral7bv0.3'
+            model_cache='/tmp/model/cache'
+
+            # Initialize model state
+            self.neuron_model = None
+            self.tokenizer = None
+
+            #Downloading files to local dir
+            if not os.path.exists(model_path): 
+                os.makedirs(cacheDir, exist_ok=True)
+                os.makedirs(model_cache, exist_ok=True)
+                logger.info("downloading model file to../tmp/model/neuron-mistral7bv0.3")
+                model_path = snapshot_download(repo_id=mistral_model, local_dir=cacheDir, local_dir_use_symlinks=False)
+                logger.info(f"model path: {model_path}")
+            
+            logger.info(f"Checking model path contents: {os.listdir(model_path)}")
+
+            # Set the environment variable with absolute path
+            os.environ.update({
+                "NEURON_RT_VISIBLE_CORES": "0,1",
+                "NEURON_RT_NUM_CORES": "2",
+                "NEURON_RT_USE_PREFETCHED_NEFF": "1",
+            })
+
+            logger.info("Loading tokenizer...")
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                model_path,
+                local_files_only=True
+            )
+
+            # Set padding token
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+                logger.info("Set padding token to EOS token")
+
+
+            logger.info("Loading  model...")
+            # Load model with minimal configuration
+            self.neuron_model = MistralForSampling.from_pretrained(
+                model_path, batch_size=1, tp_degree=2, amp='bf16' 
+            )
+
+            logger.info("Model preparation...")
+
+            neuronxcc_dirs = [d for d in os.listdir(model_cache)]
+            if not neuronxcc_dirs:
+                # compile modele first time and save compile artifacts in cache dir
+                self.neuron_model.to_neuron()
+                self.neuron_model.save(model_cache)
+            else:
+                # load pre-complied .neff files
+                self.neuron_model.load(model_cache)
+                self.neuron_model.to_neuron()
+            
+            logger.info("Model successfully prepared for inference")
+
+            # Verify initialization
+            if not self._verify_model_state():
+                raise RuntimeError("Model initialization failed verification")
+            
+            logger.info("Model initialization complete")
+
+        except Exception as e:
+            logger.error(f"Error during model initialization: {e}")
+            raise
+
+    def _verify_model_state(self):
+        if self.neuron_model is None:
+            return False
+        if not hasattr(self.neuron_model, 'sample'):
+            return False
+        if self.tokenizer is None:
+            return False
+        return True
+    
+    def infer(self, sentence: str):
+        input_ids = self.tokenizer.encode(sentence, return_tensors="pt")
+        with torch.inference_mode():
+            try:
+                logger.info(f"Performing inference on input: {sentence}")
+                generated_sequences = self.neuron_model.sample(
+                    input_ids, sequence_length=2048, top_k=50
+                )
+                decoded_sequences = [self.tokenizer.decode(seq, skip_special_tokens=True) for seq in generated_sequences]
+                logger.info(f"Inference result: {decoded_sequences}")
+                return decoded_sequences
+            except Exception as e:
+                logger.error(f"Error during inference: {e}")
+                return {"error": "Inference failed"}
+
+
+# Create an entry point for the FastAPI application
+entrypoint = APIIngress.bind(MistralModel.bind())
diff --git a/manifests/modules/aiml/chatbot/ray-service-neuron-mistral-chatbot/ray_service_mistral.yaml b/manifests/modules/aiml/chatbot/ray-service-neuron-mistral-chatbot/ray_service_mistral.yaml
new file mode 100644
index 000000000..73b8f3bd3
--- /dev/null
+++ b/manifests/modules/aiml/chatbot/ray-service-neuron-mistral-chatbot/ray_service_mistral.yaml
@@ -0,0 +1,194 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: mistral
+---
+#----------------------------------------------------------------------
+# NOTE: For deployment instructions, refer to the DoEKS website.
+#----------------------------------------------------------------------
+apiVersion: ray.io/v1
+kind: RayService
+metadata:
+  name: mistral
+  namespace: mistral
+spec:
+  serviceUnhealthySecondThreshold: 900
+  deploymentUnhealthySecondThreshold: 300
+  serveConfigV2: |
+    applications:
+      - name: mistral-deployment
+        import_path: "mistral1:entrypoint"
+        route_prefix: "/"
+        runtime_env:
+          env_vars:
+            MODEL_ID: "askulkarni2/neuron-mistral7bv0.3"
+            NEURON_CC_FLAGS: "-O1"
+            LD_LIBRARY_PATH: "/home/ray/anaconda3/lib:$LD_LIBRARY_PATH"
+            NEURON_CORES: "2"
+            NEURON_COMPILE_CACHE_URL: "/tmp/model/cache"
+            NEURON_RT_CACHE_DIRECTORY: "/tmp/model/cache"
+        deployments:
+          - name: mistral-7b
+            autoscaling_config:
+              min_replicas: 1
+              max_replicas: 1
+              target_num_ongoing_requests_per_replica: 1
+            ray_actor_options:
+              resources: {"neuron_cores": 2}
+              memory: 28000000000
+  rayClusterConfig:
+    rayVersion: '2.32.0'
+    enableInTreeAutoscaling: true
+    headGroupSpec:
+      serviceType: NodePort
+      headService:
+        metadata:
+          name: mistral
+      rayStartParams:
+        dashboard-host: '0.0.0.0'
+        num-cpus: "0" # this is to ensure no tasks or actors are scheduled on the head Pod
+      template:
+        spec:
+          containers:
+          - name: head
+            image: public.ecr.aws/e3e2e5u9/aiml/mistral-7b:latest
+            imagePullPolicy: Always # Ensure the image is always pulled when updated
+            lifecycle:
+              preStop:
+                exec:
+                  command: ["/bin/sh", "-c", "ray stop"]
+            ports:
+            - containerPort: 6379
+              name: gcs
+            - containerPort: 8265
+              name: dashboard
+            - containerPort: 10001
+              name: client
+            - containerPort: 8000
+              name: serve
+            volumeMounts:
+            - mountPath: /tmp/ray
+              name: ray-logs
+            - mountPath: /tmp/model/cache
+              name: model-cache
+            resources:
+              limits:
+                cpu: "4"
+                memory: 16Gi
+              requests:
+                cpu: "2"
+                memory: 8Gi
+            env:
+            - name: PORT
+              value: "8000"
+            - name: LD_LIBRARY_PATH
+              value: "/home/ray/anaconda3/lib:$LD_LIBRARY_PATH"
+          nodeSelector:
+            instanceType: mixed-x86
+            provisionerType: Karpenter
+            workload: rayhead
+          volumes:
+          - name: ray-logs
+            emptyDir: {}
+          - name: model-cache
+            emptyDir: {}
+    workerGroupSpecs:
+    - groupName: worker-group
+      replicas: 1
+      minReplicas: 1
+      maxReplicas: 1
+      rayStartParams:
+        resources: '"{\"neuron_cores\": 2}"'
+        num-cpus: "6"
+      template:
+        spec:
+          containers:
+          - name: worker
+            image: public.ecr.aws/e3e2e5u9/aiml/mistral-7b:latest
+            imagePullPolicy: Always # Ensure the image is always pulled when updated
+            lifecycle:
+              preStop:
+                exec:
+                  command: ["/bin/sh", "-c", "ray stop"]
+            # We are using 2 Neuron cores per HTTP request hence this configuration handles 6 requests per second
+            resources:
+              limits:
+                memory: "30Gi"
+                aws.amazon.com/neuron: "1"
+              requests:
+                memory: "28Gi"
+                aws.amazon.com/neuron: "1"
+            env:
+            # Model and Neuron configuration
+            - name: MODEL_ID
+              value: "askulkarni2/neuron-mistral7bv0.3"
+            - name: NEURON_CORES
+              value: "2"
+            - name: NEURON_RT_NUM_CORES
+              value: "2"
+            - name: NEURON_RT_VISIBLE_CORES
+              value: "0,1"
+            - name: NEURON_CC_FLAGS
+              value: "-O1"  # Changed from --no-compile
+            - name: NEURON_COMPILE_ONLY
+              value: "0"
+            - name: NEURON_RT_LOG_LEVEL
+              value: "INFO"
+            # Cache configuration
+            - name: NEURON_COMPILE_CACHE_URL
+              value: "/tmp/model/cache"
+            - name: NEURON_RT_CACHE_DIRECTORY
+              value: "/tmp/model/cache"
+            - name: NEURON_RT_USE_PREFETCHED_NEFF
+              value: "1"  # Added to use pre-compiled NEFF files
+            # Memory management
+            - name: NEURON_RT_MAX_WORKSPACE_SIZE
+              value: "8589934592"
+            - name: XLA_TENSOR_ALLOCATOR_MAXSIZE
+              value: "12884901888"
+            - name: MALLOC_ARENA_MAX
+              value: "32"
+            - name: MALLOC_TRIM_THRESHOLD_
+              value: "128K"
+            - name: XLA_PYTHON_CLIENT_MEM_FRACTION
+              value: "0.95"
+            # Runtime configuration
+            - name: NEURON_RT_STALL_ENABLE
+              value: "1"
+            - name: NEURON_RT_BLOCKING_IO
+              value: "1"
+            - name: NEURON_RT_EXEC_TIMEOUT
+              value: "900"
+            - name: RAY_memory_monitor_refresh_ms
+              value: "5000"
+            - name: RAY_memory_usage_threshold
+              value: "0.90"
+            # System paths
+            - name: LD_LIBRARY_PATH
+              value: "/home/ray/anaconda3/lib:$LD_LIBRARY_PATH"
+            - name: PORT
+              value: "8000"
+            volumeMounts:
+            - mountPath: /tmp/ray
+              name: ray-logs
+            - mountPath: /dev/shm
+              name: dshm
+            - mountPath: /tmp/model/cache
+              name: model-cache
+          volumes:
+          - name: dshm
+            emptyDir:
+              medium: Memory
+          - name: ray-logs
+            emptyDir: {}
+          - name: model-cache
+            emptyDir: {}
+          nodeSelector:
+            instanceType: trainium
+            provisionerType: Karpenter
+            neuron.amazonaws.com/neuron-device: "true"
+          tolerations:
+          - key: "aws.amazon.com/neuron"
+            operator: "Exists"
+            effect: "NoSchedule"
+          
diff --git a/website/docs/aiml/chatbot/add-mistral.md b/website/docs/aiml/chatbot/add-mistral.md
new file mode 100644
index 000000000..d55033619
--- /dev/null
+++ b/website/docs/aiml/chatbot/add-mistral.md
@@ -0,0 +1,86 @@
+---
+title: "Deploying The Mistral-7B-Instruct-v0.3 Chat Model on Ray Serve"
+sidebar_position: 60
+---
+
+With all the node pools provisioned, we can now proceed to deploy Mistral-7B-Instruct-v0.3 chatbot infrastructure.
+
+Let's begin by deploying the `ray-service-mistral.yaml` file:
+
+```bash wait=5
+$ kubectl apply -k ~/environment/eks-workshop/modules/aiml/chatbot/ray-service-neuron-mistral-chatbot
+namespace/mistral created
+rayservice.ray.io/mistral created
+```
+
+### Creating the Ray Service Pods for Inference
+
+The `ray-service-mistral.yaml` file defines the Kubernetes configuration for deploying the Ray Serve service for the mistral7bv0.3 AI chatbot:
+
+```file
+manifests/modules/aiml/chatbot/ray-service-neuron-mistral-chatbot/ray_service_mistral.yaml
+```
+
+This configuration accomplishes the following:
+
+1. Creates a Kubernetes namespace named `mistral` for resource isolation
+2. Deploys a RayService named `rayservice.ray.io/mistral` that utilizes a Python script to create the Ray Serve component
+3. Provisions a Head Pod and Worker Pods to pull Docker images from Amazon Elastic Container Registry (ECR)
+
+After applying the configurations, we'll monitor the progress of the head and worker pods:
+
+```bash wait=5
+$ kubectl get pod -n mistral
+NAME                                                    READY   STATUS    RESTARTS   AGE
+pod/mistral-raycluster-ltvjb-head-7rd7d                  0/2     Pending   0          4s
+pod/mistral-raycluster-ltvjb-worker-worker-group-nff7x   0/1     Pending   0          4s
+```
+
+:::caution
+It may take up to 15 minutes for both pods to be ready.
+:::
+
+We can wait for the pods to be ready using the following command:
+
+```bash timeout=900
+$ kubectl wait pod \
+--all \
+--for=condition=Ready \
+--namespace=mistral \
+--timeout=15m
+pod/mistral-raycluster-ltvjb-head-7rd7d met
+pod/mistral-raycluster-ltvjb-worker-worker-group-nff7x met
+```
+
+Once the pods are fully deployed, we'll verify that everything is in place:
+
+```bash
+$ kubectl get all -n mistral
+NAME                                                     READY   STATUS    RESTARTS   AGE
+pod/mistral-raycluster-ltvjb-head-7rd7d                  2/2     Running   0          7m
+pod/mistral-raycluster-ltvjb-worker-worker-group-nff7x   1/1     Running   0          7m
+
+NAME                        TYPE           CLUSTER-IP      EXTERNAL-IP      PORT(S)                                                                       AGE
+service/mistral             NodePort       172.20.74.49    <none>           6379:32625/TCP,8265:30941/TCP,10001:32430/TCP,8000:31393/TCP,8080:31361/TCP   94m
+service/mistral-head-svc    NodePort       172.20.121.46   <none>           8000:30481/TCP,8080:32609/TCP,6379:31066/TCP,8265:31006/TCP,10001:30220/TCP   92m
+service/mistral-serve-svc   NodePort       172.20.241.50   <none>           8000:32351/TCP                                                                92m
+
+NAME                                         DESIRED WORKERS   AVAILABLE WORKERS   CPUS   MEMORY   GPUS   STATUS   AGE
+raycluster.ray.io/mistral-raycluster-ltvjb   1                 1                   2      36Gi     0      ready    94m
+
+NAME                        SERVICE STATUS   NUM SERVE ENDPOINTS
+rayservice.ray.io/mistral   Running          2
+```
+
+:::caution
+Configuring RayService may take up to 10 minutes.
+:::
+
+We can wait for the RayService to be running with this command:
+
+```bash wait=5 timeout=600
+$ kubectl wait --for=jsonpath='{.status.serviceStatus}'=Running rayservice/mistral -n mistral --timeout=10m
+rayservice.ray.io/mistral condition met
+```
+
+With everything properly deployed, we can now proceed to create the web interface for the chatbot.
diff --git a/website/docs/aiml/chatbot/gradio-mistral.md b/website/docs/aiml/chatbot/gradio-mistral.md
new file mode 100644
index 000000000..61a7217ee
--- /dev/null
+++ b/website/docs/aiml/chatbot/gradio-mistral.md
@@ -0,0 +1,68 @@
+---
+title: "Configuring the Gradio Web User Interface for Access"
+sidebar_position: 70
+---
+
+After all the resources have been configured within the Ray Serve Cluster, it's now time to directly access the Mistral-7B-Instruct-v0.3 chatbot. The web interface is powered by the Gradio UI.
+
+:::tip
+You can learn more about Load Balancers in the [Load Balancer module](../../../fundamentals/exposing/loadbalancer/index.md) provided in this workshop.
+:::
+
+### Deploying Gradio Web User Interface
+
+Once the AWS Load Balancer Controller has been installed, we can deploy the Gradio UI components.
+
+```file
+manifests/modules/aiml/chatbot/gradio-mistral/gradio-ui.yaml
+```
+
+The components consist of a `Deployment`, `Service`, and `ConfigMap` to launch the application. In particular, the `Service` component is named gradio-service and is deployed as a `LoadBalancer`.
+
+```bash
+$ kubectl apply -k ~/environment/eks-workshop/modules/aiml/chatbot/gradio-mistral
+namespace/gradio-mistral-tran1 created
+configmap/gradio-app-script created
+service/gradio-service created
+deployment.apps/gradio-deployment created
+```
+
+To check the status of each component, run the following commands:
+
+```bash
+$ kubectl get deployments -n gradio-mistral-tran1
+NAME                READY   UP-TO-DATE   AVAILABLE   AGE
+gradio-deployment   1/1     1            1           95s
+```
+
+```bash
+$ kubectl get configmaps -n gradio-mistral-tran1
+NAME                DATA   AGE
+gradio-app-script   1      110s
+kube-root-ca.crt    1      111s
+```
+
+### Accessing the Chatbot Website
+
+Once the load balancer has finished deploying, use the external IP address to directly access the website:
+
+```bash wait=10
+$ kubectl get services -n gradio-llama2-inf2
+NAME             TYPE          ClUSTER-IP    EXTERNAL-IP                                                                      PORT(S)         AGE
+gradio-service   LoadBalancer  172.20.84.26  k8s-gradioll-gradiose-a6d0b586ce-06885d584b38b400.elb.us-west-2.amazonaws.com    80:30802/TCP    8m42s
+```
+
+To wait until the Network Load Balancer has finished provisioning, run the following command:
+
+```bash wait=240 timeout=600
+$ curl --head -X GET --retry 30 --retry-all-errors --retry-delay 15 --connect-timeout 5 --max-time 10 \
+-k $(kubectl get service -n gradio-mistral-tran1 gradio-service -o jsonpath="{.status.loadBalancer.ingress[*].hostname}{'\n'}")
+```
+
+Now that our application is exposed to the outside world, let's access it by pasting the URL in your web browser. You will see the Mistral-7B-Instruct-v0.3 chatbot and will be able to interact with it by asking questions.
+
+<Browser url="http://k8s-gradioll-gradiose-a6d0b586ce-06885d584b38b400.elb.us-west-2.amazonaws.com">
+<img src={require('@site/static/img/sample-app-screens/chatbot.webp').default}/>
+</Browser>
+
+This concludes the current lab on deploying the Mistral-7B-Instruct-v0.3 Chatbot Model within an EKS Cluster via Karpenter.
diff --git a/website/docs/aiml/chatbot/mistral.md b/website/docs/aiml/chatbot/mistral.md
new file mode 100644
index 000000000..ccda8ed3d
--- /dev/null
+++ b/website/docs/aiml/chatbot/mistral.md
@@ -0,0 +1,31 @@
+---
+title: "Understanding the Mistral-7B-Instruct-v0.3 Chat Model"
+sidebar_position: 50
+sidebar_custom_props: { "module": true }
+weight: 30
+description: "Use Inferentia to accelerate deep learning inference workloads on Amazon Elastic Kubernetes Service."
+---
+
+
+Mistral-7B-Instruct-v0.3 model represents a significant advancement in language model technology, combining powerful capabilities like Text generation and completion, Information extraction, Data analysis, API interaction, Complex reasoning tasks with practical efficiency.
+
+As a 7B parameter model, it offers remarkable performance while remaining deployable on standard hardware configurations. It requires aproximately ~26-28 GB memory (13 GB for 7B parameters and additional ~13 GB for Optimizer states and overhead). `trn1.2xlarge` instance with 32GB memory is suitable for running the Mistral-7B model, as it provides enough headroom Model weights, Optimizer states, KV cache, Input/output tensors and Runtime overhead. 
+
+Mistral-7B-Instruct-v0.3 is implemented using FastAPI, Ray Serve, and PyTorch-based Hugging Face Transformers to create a seamless API for text generation.
+
+Here's the code for compiling the model that we'll use:
+
+```file
+manifests/modules/aiml/chatbot/ray-service-neuron-mistral-chatbot/mistral1.py
+```
+
+This Python code performs the following tasks:
+
+1. Configures an APIIngress class responsible for handling inference requests
+2. Defines a MistralModel class responsible for managing the Mistral language model
+3. Loads and compiles the model based on existing parameters
+4. Creates an entry point for the FastAPI application
+
+Through these steps, the Mistral-7B-Instruct-v0.3 chat model allows the endpoint to accept input sentences and generate text outputs. The high performance efficiency in processing tasks enables the model to handle a wide variety of natural language processing applications, such as chat bots and text generation tasks.
+
+In this lab, we'll see how the Mistral-7B-Instruct-v0.3 Model is configured with Ray Service as a Kubernetes configuration, allowing users to understand how to incorporate fine-tuning and deploy their own natural language processing applications.