-
Notifications
You must be signed in to change notification settings - Fork 16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix #1066: Implement VoyageAI embedding client #1068
Merged
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
74f6be2
Fix #1066: Implement VoyageAI embedding client
tatu-at-datastax 87fb56c
Add plumbing for query/index (read/store) config choice
tatu-at-datastax 4d2914f
Merge branch 'main' into tatu/1066-embedding-voyage-ai
tatu-at-datastax d0cb6cf
Remove unnecessary exception declarations
tatu-at-datastax 529f34d
Bit more wiring
tatu-at-datastax c708a73
more wiring
tatu-at-datastax 7941d2a
Add support for autoTruncate
tatu-at-datastax 5156e79
Merge branch 'main' into tatu/1066-embedding-voyage-ai
tatu-at-datastax File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,12 +13,9 @@ | |
import jakarta.inject.Inject; | ||
import java.util.Map; | ||
import java.util.Optional; | ||
import org.slf4j.Logger; | ||
|
||
@ApplicationScoped | ||
public class EmbeddingProviderFactory { | ||
|
||
private static Logger logger = org.slf4j.LoggerFactory.getLogger(EmbeddingProviderFactory.class); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was not being used, removed. |
||
@Inject Instance<EmbeddingProviderConfigStore> embeddingProviderConfigStore; | ||
|
||
@Inject OperationsConfig config; | ||
|
@@ -41,6 +38,7 @@ EmbeddingProvider create( | |
Map.entry(ProviderConstants.AZURE_OPENAI, AzureOpenAIEmbeddingClient::new), | ||
Map.entry(ProviderConstants.HUGGINGFACE, HuggingFaceEmbeddingClient::new), | ||
Map.entry(ProviderConstants.VERTEXAI, VertexAIEmbeddingClient::new), | ||
Map.entry(ProviderConstants.VOYAGE_AI, VoyageAIEmbeddingClient::new), | ||
Map.entry(ProviderConstants.COHERE, CohereEmbeddingClient::new), | ||
Map.entry(ProviderConstants.NVIDIA, NvidiaEmbeddingClient::new)); | ||
|
||
|
@@ -49,18 +47,18 @@ public EmbeddingProvider getConfiguration( | |
String serviceName, | ||
String modelName, | ||
int dimension, | ||
Map<String, Object> vectorizeServiceParameter, | ||
Map<String, Object> vectorizeServiceParameters, | ||
String commandName) { | ||
return addService( | ||
tenant, serviceName, modelName, dimension, vectorizeServiceParameter, commandName); | ||
tenant, serviceName, modelName, dimension, vectorizeServiceParameters, commandName); | ||
} | ||
|
||
private synchronized EmbeddingProvider addService( | ||
Optional<String> tenant, | ||
String serviceName, | ||
String modelName, | ||
int dimension, | ||
Map<String, Object> vectorizeServiceParameter, | ||
Map<String, Object> vectorizeServiceParameters, | ||
String commandName) { | ||
final EmbeddingProviderConfigStore.ServiceConfig configuration = | ||
embeddingProviderConfigStore.get().getConfiguration(tenant, serviceName); | ||
|
@@ -73,7 +71,7 @@ private synchronized EmbeddingProvider addService( | |
configuration.baseUrl(), | ||
modelName, | ||
embeddingService, | ||
vectorizeServiceParameter, | ||
vectorizeServiceParameters, | ||
commandName); | ||
} | ||
|
||
|
@@ -105,6 +103,6 @@ private synchronized EmbeddingProvider addService( | |
configuration.baseUrl(), | ||
modelName, | ||
dimension, | ||
vectorizeServiceParameter); | ||
vectorizeServiceParameters); | ||
} | ||
} |
119 changes: 119 additions & 0 deletions
119
...in/java/io/stargate/sgv2/jsonapi/service/embedding/operation/VoyageAIEmbeddingClient.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
package io.stargate.sgv2.jsonapi.service.embedding.operation; | ||
|
||
import com.fasterxml.jackson.annotation.JsonIgnoreProperties; | ||
import com.fasterxml.jackson.annotation.JsonInclude; | ||
import io.quarkus.rest.client.reactive.ClientExceptionMapper; | ||
import io.quarkus.rest.client.reactive.QuarkusRestClientBuilder; | ||
import io.smallrye.mutiny.Uni; | ||
import io.stargate.sgv2.jsonapi.exception.ErrorCode; | ||
import io.stargate.sgv2.jsonapi.exception.JsonApiException; | ||
import io.stargate.sgv2.jsonapi.service.embedding.configuration.EmbeddingProviderConfigStore; | ||
import io.stargate.sgv2.jsonapi.service.embedding.configuration.EmbeddingProviderResponseValidation; | ||
import io.stargate.sgv2.jsonapi.service.embedding.operation.error.HttpResponseErrorMessageMapper; | ||
import jakarta.ws.rs.HeaderParam; | ||
import jakarta.ws.rs.POST; | ||
import jakarta.ws.rs.core.Response; | ||
import java.net.URI; | ||
import java.time.Duration; | ||
import java.util.Arrays; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Optional; | ||
import java.util.concurrent.TimeUnit; | ||
import org.eclipse.microprofile.rest.client.annotation.ClientHeaderParam; | ||
import org.eclipse.microprofile.rest.client.annotation.RegisterProvider; | ||
import org.eclipse.microprofile.rest.client.inject.RegisterRestClient; | ||
|
||
public class VoyageAIEmbeddingClient implements EmbeddingProvider { | ||
private EmbeddingProviderConfigStore.RequestProperties requestProperties; | ||
private String modelName; | ||
private final VoyageAIEmbeddingProvider embeddingProvider; | ||
|
||
private final String requestTypeQuery, requestTypeIndex; | ||
private final Boolean autoTruncate; | ||
|
||
public VoyageAIEmbeddingClient( | ||
EmbeddingProviderConfigStore.RequestProperties requestProperties, | ||
String baseUrl, | ||
String modelName, | ||
int dimension, | ||
Map<String, Object> vectorizeServiceParameters) { | ||
this.requestProperties = requestProperties; | ||
this.modelName = modelName; | ||
// use configured input_type if available | ||
requestTypeQuery = requestProperties.requestTypeQuery().orElse(null); | ||
requestTypeIndex = requestProperties.requestTypeIndex().orElse(null); | ||
Object v = vectorizeServiceParameters.get("autoTruncate"); | ||
autoTruncate = (v instanceof Boolean) ? (Boolean) v : null; | ||
|
||
embeddingProvider = | ||
QuarkusRestClientBuilder.newBuilder() | ||
.baseUri(URI.create(baseUrl)) | ||
.readTimeout(requestProperties.timeoutInMillis(), TimeUnit.MILLISECONDS) | ||
.build(VoyageAIEmbeddingProvider.class); | ||
} | ||
|
||
@RegisterRestClient | ||
@RegisterProvider(EmbeddingProviderResponseValidation.class) | ||
public interface VoyageAIEmbeddingProvider { | ||
@POST | ||
// no path specified, as it is already included in the baseUri | ||
@ClientHeaderParam(name = "Content-Type", value = "application/json") | ||
Uni<EmbeddingResponse> embed( | ||
@HeaderParam("Authorization") String accessToken, EmbeddingRequest request); | ||
|
||
@ClientExceptionMapper | ||
static RuntimeException mapException(Response response) { | ||
return HttpResponseErrorMessageMapper.getDefaultException(response); | ||
} | ||
} | ||
|
||
record EmbeddingRequest( | ||
@JsonInclude(JsonInclude.Include.NON_EMPTY) String input_type, | ||
String[] input, | ||
String model, | ||
@JsonInclude(JsonInclude.Include.NON_NULL) Boolean truncation) {} | ||
|
||
@JsonIgnoreProperties({"object"}) | ||
record EmbeddingResponse(Data[] data, String model, Usage usage) { | ||
@JsonIgnoreProperties({"object"}) | ||
record Data(int index, float[] embedding) {} | ||
|
||
record Usage(int total_tokens) {} | ||
} | ||
|
||
@Override | ||
public Uni<List<float[]>> vectorize( | ||
List<String> texts, | ||
Optional<String> apiKeyOverride, | ||
EmbeddingRequestType embeddingRequestType) { | ||
final String inputType = | ||
(embeddingRequestType == EmbeddingRequestType.SEARCH) ? requestTypeQuery : requestTypeIndex; | ||
String[] textArray = new String[texts.size()]; | ||
EmbeddingRequest request = | ||
new EmbeddingRequest(inputType, texts.toArray(textArray), modelName, autoTruncate); | ||
Uni<EmbeddingResponse> response = | ||
embeddingProvider | ||
.embed("Bearer " + apiKeyOverride.get(), request) | ||
.onFailure( | ||
throwable -> { | ||
return (throwable.getCause() != null | ||
&& throwable.getCause() instanceof JsonApiException jae | ||
&& jae.getErrorCode() == ErrorCode.EMBEDDING_PROVIDER_TIMEOUT); | ||
}) | ||
.retry() | ||
.withBackOff(Duration.ofMillis(requestProperties.retryDelayInMillis())) | ||
.atMost(requestProperties.maxRetries()); | ||
return response | ||
.onItem() | ||
.transform( | ||
resp -> { | ||
if (resp.data() == null) { | ||
return Collections.emptyList(); | ||
} | ||
Arrays.sort(resp.data(), (a, b) -> a.index() - b.index()); | ||
return Arrays.stream(resp.data()).map(data -> data.embedding()).toList(); | ||
}); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Needed to map 2 recently added request-properties to pass to embedding providers.