googleforgames · zmerlynn · Mar 11, 2024 · Mar 10, 2024 · Mar 11, 2024
diff --git a/examples/simple-genai-server/Makefile b/examples/simple-genai-server/Makefile
@@ -26,7 +26,7 @@ REPOSITORY ?= us-docker.pkg.dev/agones-images/examples
 
 mkfile_path := $(abspath $(lastword $(MAKEFILE_LIST)))
 project_path := $(dir $(mkfile_path))
-server_tag := $(REPOSITORY)/simple-genai-game-server:0.1
+server_tag := $(REPOSITORY)/simple-genai-game-server:0.2
 server_tag_linux_amd64 = $(server_tag)-linux-amd64
 push_server_manifest = $(server_tag_linux_amd64)
 root_path = $(realpath $(project_path)/../..)

diff --git a/examples/simple-genai-server/README.md b/examples/simple-genai-server/README.md
@@ -115,7 +115,10 @@ If you set up the `gameserver_autochat.yaml` the chat will be in the game server
 kubectl logs -f gen-ai-server-auto -c simple-genai-game-server
 ```
 
-In autochat mode the game server will shutdown automatically once the chat is complete.
+In autochat mode, the game server will stay running forever until the game server is deleted.
+While running, we keep `--ConcurrentPlayers` slots of players running - each simulated player
+will initiate a chat and then go until they send `--StopPhrase` or until `--NumChats`, whichever
+comes first, after which a new player will fill the slot.
 
 If you set up the `gameserver_manualchat.yaml` you can manually send requests to the GenAI endpoint.
 Retreive the IP address and port:

diff --git a/examples/simple-genai-server/gameserver_autochat.yaml b/examples/simple-genai-server/gameserver_autochat.yaml
@@ -26,7 +26,7 @@ spec:
     spec:
       containers:
         - name: simple-genai-game-server
-          image: us-docker.pkg.dev/agones-images/examples/simple-genai-game-server:0.1
+          image: us-docker.pkg.dev/agones-images/examples/simple-genai-game-server:0.2
           # imagePullPolicy: Always  # add for development
           env:
             - name: GEN_AI_ENDPOINT
@@ -51,10 +51,3 @@ spec:
             limits:
               memory: 64Mi
               cpu: 20m
-      # Schedule onto the game server node pool when running in the same cluster as the inference server.
-      # tolerations:
-      #   - key: "agones.dev/role"
-      #     value: "gameserver"
-      #     effect: "NoExecute"
-      # nodeSelector:
-      #   agones.dev/role: gameserver
diff --git a/examples/simple-genai-server/gameserver_manualchat.yaml b/examples/simple-genai-server/gameserver_manualchat.yaml
@@ -26,7 +26,7 @@ spec:
     spec:
       containers:
         - name: simple-genai-game-server
-          image: us-docker.pkg.dev/agones-images/examples/simple-genai-game-server:0.1
+          image: us-docker.pkg.dev/agones-images/examples/simple-genai-game-server:0.2
           # imagePullPolicy: Always  # add for development
           env:
             - name: GEN_AI_ENDPOINT
@@ -44,10 +44,3 @@ spec:
             limits:
               memory: 64Mi
               cpu: 20m
-      # Schedule onto the game server node pool when running in the same cluster as the inference server.
-      # tolerations:
-      #   - key: "agones.dev/role"
-      #     value: "gameserver"
-      #     effect: "NoExecute"
-      # nodeSelector:
-      #   agones.dev/role: gameserver
diff --git a/examples/simple-genai-server/gameserver_npcchat.yaml b/examples/simple-genai-server/gameserver_npcchat.yaml
@@ -17,50 +17,73 @@ kind: GameServer
 metadata:
   name: gen-ai-server-npc
 spec:
-  ports:
-    - name: default
-      portPolicy: Dynamic
-      containerPort: 7654
-      protocol: TCP
   template:
     spec:
       containers:
         - name: simple-genai-game-server
-          image: us-docker.pkg.dev/agones-images/examples/simple-genai-game-server:0.1
+          image: us-docker.pkg.dev/agones-images/examples/simple-genai-game-server:0.2
           # imagePullPolicy: Always  # add for development
           env:
             - name: GEN_AI_ENDPOINT
-            # Use the service endpoint address when running in the same cluster as the inference server.
-              # TODO (igooch): Change this to the `/genai/npc-chat` endpoint when it's properly plumbed in the inference server
-              value: "http://npc-chat-api.genai.svc.cluster.local:80"
+              # Use the service endpoint address when running in the same cluster as the inference server.
+              value: "http://genai-api.genai.svc/genai/npc_chat"
+              # To bypass the /genai routing layer:
+              # value: "http://npc-chat-api.genai.svc"
             # GenAiContext is not passed to the npc-chat-api endpoint.
-            - name: GEN_AI_NPC # False by default. Use GEN_AI_NPC "true" when using the npc-chat-api as the GEN_AI_ENDPOINT.
+            - name: GEN_AI_NPC  # False by default. Use GEN_AI_NPC "true" when using the npc-chat-api as the GEN_AI_ENDPOINT.
               value: "true"
-            - name: FROM_ID # Default is "2".
-              value: "2"
-            - name: TO_ID # Default is "1".
+            - name: TO_ID  # Default is "1".
               value: "1"
             - name: SIM_ENDPOINT
-              value: "http://192.1.1.2/genai/chat"
-            - name: SIM_CONTEXT
-              value: "Ask questions about one of the following: What happened here? Where were you during the earthquake? Do you have supplies?"
+              value: "http://genai-api.genai.svc/genai/chat"
+              # To bypass the /genai routing layer:
+              # value: "http://vertex-chat-api.genai.svc"
             - name: SIM_NPC
-              value: "false" # False by default. Use SIM_NPC "true" when using the npc-chat-api as the SIM_ENDPOINT.
+              value: "false"  # False by default. Use SIM_NPC "true" when using the npc-chat-api as the SIM_ENDPOINT.
+            - name: CONCURRENT_PLAYERS
+              value: "1"
             - name: PROMPT
-              value: "Hello"
+              # In the autonomous case, prompt is the first message sent by the Sim. We need this to
+              # prime the pump since neither chat LLMs is expecting to start.
+              value: "Hey, do you have a sec?"
+            - name: SIM_CONTEXT
+              # Context for simulated player. This prompt was tested against Vertex chat-bison - if you switch
+              # to a different model for the Sim, you will likely need to adjust the prompt.
+              value: |
+                Please act like a person talking to another person named Joseph.
+                You are not a language model. You are not a virtual assistant.
+
+                Cymbal City, where you live, just had a major earthquake. You just approached a man named Joseph,
+                who you are familiar with but don't know well. You survived the earthquake, but you don't remember
+                your name. You are in shock, but you're curious what happened to Cymbal City during the earthquake,
+                you're curious who you are, and you're curious where you can find a hot dog.
+
+                Joseph looks like an aid worker, ask him questions.
+                Your questions should be short, you are in a hurry.
+                Ask one question about something you have not yet asked about.
+
+                Do not offer to help.
+                Do not try to be helpful, you are busy.
+
+                When you have all the information you need, say Bye!
+            - name: STOP_PHRASE
+              # If the Sim says something containing this string, allow the NPC to reply and then stop.
+              # This should match a phrase in the SIM_CONTEXT.
+              value: "Bye!"
             - name: NUM_CHATS
-              value: "50"
+              value: "20"
           resources:
             requests:
               memory: 64Mi
               cpu: 20m
             limits:
               memory: 64Mi
               cpu: 20m
-      # Schedule onto the game server node pool when running in the same cluster as the inference server.
-      # tolerations:
-      #   - key: "agones.dev/role"
-      #     value: "gameserver"
-      #     effect: "NoExecute"
-      # nodeSelector:
-      #   agones.dev/role: gameserver
+      # Schedule anywhere without a GPU
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+              - matchExpressions:
+                  - key: cloud.google.com/gke-accelerator
+                    operator: DoesNotExist