[compute-ai-embeddings] Rework support for Hugging Face local models …

…for embeddings (LangStream#584)
vectorize-io · Oct 13, 2023 · c2da53b · c2da53b
1 parent 4d1d76c
commit c2da53b
Show file tree

Hide file tree

Showing 12 changed files with 268 additions and 72 deletions.
diff --git a/examples/applications/compute-hugging-face-embeddings/README.md b/examples/applications/compute-hugging-face-embeddings/README.md
@@ -0,0 +1,35 @@
+# Computing text embeddings with Hugging Face - local execution
+
+This sample application shows how to use Hugging Face to compute text embeddings without calling the API.
+
+## Configuring the model
+
+In order to use an open source model, you need to configure the model in the pipeline.yaml file.
+You can do it using environment variables or by editing the file directly.
+
+```
+export HUGGING_FACE_PROVIDER=local
+export HUGGING_FACE_EMBEDDINGS_MODEL=multilingual-e5-small
+export HUGGING_FACE_EMBEDDINGS_MODEL_URL=djl://ai.djl.huggingface.pytorch/intfloat/multilingual-e5-small
+```
+
+if you want to the API set  HUGGING_FACE_PROVIDER to "api" and configure your API access key.
+```
+export HUGGING_FACE_PROVIDER=api
+export HUGGING_FACE_ACCESS_KEY=your_access_key
+export HUGGING_FACE_EMBEDDINGS_MODEL=multilingual-e5-small
+```
+
+## Deploy the LangStream application
+
+```
+./bin/langstream apps deploy test -app examples/applications/compute-hugging-face-embeddings -i examples/instances/kafka-kubernetes.yaml -s examples/secrets/secrets.yaml
+```
+
+## Talk with the Chat bot using the CLI
+Since the application opens a gateway, we can use the gateway API to send and consume messages.
+
+```
+./bin/langstream gateway chat test -cg bot-output -pg user-input -p sessionId=$(uuidgen)
+```
+
diff --git a/examples/applications/compute-hugging-face-embeddings/configuration.yaml b/examples/applications/compute-hugging-face-embeddings/configuration.yaml
@@ -0,0 +1,24 @@
+#
+#
+# Copyright DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+configuration:
+  resources:
+    - type: "hugging-face-configuration"
+      name: "Hugging Face AI configuration"
+      configuration:
+        access-key: "${ secrets.hugging-face.access-key }"
+        provider:  "${ secrets.hugging-face.provider }"
diff --git a/examples/applications/compute-hugging-face-embeddings/gateways.yaml b/examples/applications/compute-hugging-face-embeddings/gateways.yaml
@@ -0,0 +1,38 @@
+#
+#
+# Copyright DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+gateways:
+  - id: produce-input
+    type: produce
+    topic: input-topic
+    parameters:
+      - sessionId
+    produce-options:
+      headers:
+        - key: langstream-client-session-id
+          value-from-parameters: sessionId
+
+  - id: consume-output
+    type: consume
+    topic: output-topic
+    parameters:
+      - sessionId
+    consume-options:
+      filters:
+        headers:
+          - key: langstream-client-session-id
+            value-from-parameters: sessionId
diff --git a/examples/applications/compute-hugging-face-embeddings/pipeline.yaml b/examples/applications/compute-hugging-face-embeddings/pipeline.yaml
@@ -0,0 +1,42 @@
+#
+# Copyright DataStax, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+topics:
+  - name: "input-topic"
+    creation-mode: create-if-not-exists
+  - name: "output-topic"
+    creation-mode: create-if-not-exists
+errors:
+    on-failure: "skip"
+pipeline:
+  - name: "convert-to-structure"
+    type: "document-to-json"
+    input: "input-topic"
+    configuration:
+      text-field: "question"
+  - name: "compute-embeddings"
+    type: "compute-ai-embeddings"
+    output: "output-topic"
+    configuration:
+      model: "${secrets.hugging-face.embeddings-model}" # This is the id of the model
+      model-url: "${secrets.hugging-face.embeddings-model-url}" # This is the URL of the repository containing the model
+      embeddings-field: "value.embeddings"
+      text: "{{ value.question }}"
+      batch-size: 10
+      # this is in milliseconds. It is important to take this value into consideration when using this agent in the chat response pipeline
+      # in fact this value impacts the latency of the response
+      # for latency sensitive applications, consider to set batch-size to 1 or flush-interval to 0
+      flush-interval: 500
diff --git a/examples/secrets/secrets.yaml b/examples/secrets/secrets.yaml
@@ -44,6 +44,9 @@ secrets:
   - id: hugging-face
     data:
       access-key: ${HUGGING_FACE_ACCESS_KEY:-}
+      provider: ${HUGGING_FACE_PROVIDER:-api}
+      embeddings-model: ${HUGGING_FACE_EMBEDDINGS_MODEL:-multilingual-e5-small}
+      embeddings-model-url: ${HUGGING_FACE_EMBEDDINGS_MODEL_URL:-djl://ai.djl.huggingface.pytorch/intfloat/multilingual-e5-small}
   - id: astra
     data:
       clientId: ${ASTRA_CLIENT_ID:-}

diff --git a/langstream-agents/langstream-ai-agents/pom.xml b/langstream-agents/langstream-ai-agents/pom.xml
@@ -173,7 +173,8 @@
       <groupId>ai.djl.pytorch</groupId>
       <artifactId>pytorch-native-cpu</artifactId>
       <!--classifier>osx-aarch64</classifier-->
-      <classifier>osx-x86_64</classifier>
+      <!--<classifier>osx-x86_64</classifier>-->
+      <classifier>linux-x86_64</classifier>
     </dependency>
     <dependency>
       <groupId>ai.djl.pytorch</groupId>

diff --git a/...am-ai-agents/src/main/java/ai/langstream/ai/agents/services/impl/HuggingFaceProvider.java b/...am-ai-agents/src/main/java/ai/langstream/ai/agents/services/impl/HuggingFaceProvider.java
@@ -80,11 +80,11 @@ public CompletionsService getCompletionsService(
         public EmbeddingsService getEmbeddingsService(Map<String, Object> additionalConfiguration)
                 throws Exception {
             String provider =
-                    additionalConfiguration
+                    providerConfiguration
                             .getOrDefault("provider", ComputeProvider.API.name())
                             .toString()
                             .toUpperCase();
-            String modelUrl = (String) additionalConfiguration.get("modelUrl");
+            String modelUrl = (String) additionalConfiguration.get("model-url");
             String model = (String) additionalConfiguration.get("model");
             Map<String, String> options = (Map) additionalConfiguration.get("options");
             Map<String, String> arguments = (Map) additionalConfiguration.get("arguments");
@@ -98,7 +98,7 @@ public EmbeddingsService getEmbeddingsService(Map<String, Object> additionalConf
                     if (model != null && !model.isEmpty()) {
                         builder.modelName(model);
                         if (modelUrl == null || modelUrl.isEmpty()) {
-                            modelUrl = "djl://ai.djl.huggingface.pytorch" + model;
+                            modelUrl = "djl://ai.djl.huggingface.pytorch/" + model;
                             log.info("Automatically computed model URL {}", modelUrl);
                         }
                     }

diff --git a/langstream-agents/langstream-ai-agents/src/test/resources/ConGen-BERT-Mini.zip b/langstream-agents/langstream-ai-agents/src/test/resources/ConGen-BERT-Mini.zip
diff --git a/...re/src/main/java/ai/langstream/impl/agents/ai/steps/ComputeAIEmbeddingsConfiguration.java b/...re/src/main/java/ai/langstream/impl/agents/ai/steps/ComputeAIEmbeddingsConfiguration.java
@@ -59,12 +59,6 @@ public void generateSteps(
                                     aiServiceConfigurationGenerator);
                     aiServiceConfigurationGenerator.generateAIServiceConfiguration(
                             (String) step.remove("ai-service"));
-
-                    // in the user config we use the pascal but the downstream impl requires snake
-                    final Object modelUrl = step.remove("modelUrl");
-                    if (modelUrl != null) {
-                        step.put("model-url", modelUrl);
-                    }
                 }
             };
 
@@ -153,7 +147,8 @@ public void generateSteps(
     @ConfigProperty(
             description =
                     """
-                            URL of the model to use. (HuggingFace only). The default is computed from the model: "djl://ai.djl.huggingface.pytorch{model}"
+                            URL of the model to use. (HuggingFace only). The default is computed from the model: "djl://ai.djl.huggingface.pytorch/{model}"
                              """)
+    @JsonProperty("model-url")
     private String modelUrl;
 }
diff --git a/...i/langstream/runtime/impl/k8s/agents/KubernetesGenAIToolKitFunctionAgentProviderTest.java b/...i/langstream/runtime/impl/k8s/agents/KubernetesGenAIToolKitFunctionAgentProviderTest.java
@@ -472,8 +472,8 @@ public void testStepsDoc() {
                              "type" : "string",
                              "defaultValue" : "text-embedding-ada-002"
                            },
-                           "modelUrl" : {
-                             "description" : "URL of the model to use. (HuggingFace only). The default is computed from the model: \\"djl://ai.djl.huggingface.pytorch{model}\\"",
+                           "model-url" : {
+                             "description" : "URL of the model to use. (HuggingFace only). The default is computed from the model: \\"djl://ai.djl.huggingface.pytorch/{model}\\"",
                              "required" : false,
                              "type" : "string"
                            },