diff --git a/eslint.config.js b/eslint.config.js index a53711076..308afaea7 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -8,7 +8,7 @@ export default [ rules: { 'import/namespace': 'off'} }, { - ignores: ['**/dist/**/*', '**/coverage/**/*', 'packages/ai-core/src/client/**/*'], + ignores: ['**/dist*/**/*', '**/coverage/**/*', 'packages/ai-core/src/client/**/*'], }, { files: ['**/test-util/**/*.ts', '**/packages/gen-ai-hub/src/orchestration/client/**/*'], diff --git a/packages/core/src/context.ts b/packages/core/src/context.ts index b17b8c1cb..032bf574f 100644 --- a/packages/core/src/context.ts +++ b/packages/core/src/context.ts @@ -1,6 +1,6 @@ import { createLogger } from '@sap-cloud-sdk/util'; import { - Destination, + HttpDestination, Service, ServiceCredentials, getServiceBinding, @@ -18,7 +18,7 @@ let aiCoreServiceBinding: Service | undefined; * Returns a destination object from AI Core service binding. * @returns The destination object. */ -export async function getAiCoreDestination(): Promise { +export async function getAiCoreDestination(): Promise { if (!aiCoreServiceBinding) { aiCoreServiceBinding = getAiCoreServiceKeyFromEnv() || getServiceBinding('aicore'); @@ -29,12 +29,12 @@ export async function getAiCoreDestination(): Promise { } } - const aiCoreDestination = await transformServiceBindingToDestination( + const aiCoreDestination = (await transformServiceBindingToDestination( aiCoreServiceBinding, { useCache: true } - ); + )) as HttpDestination; return aiCoreDestination; } diff --git a/packages/core/src/http-client.ts b/packages/core/src/http-client.ts index 871d64f82..3b53e80da 100644 --- a/packages/core/src/http-client.ts +++ b/packages/core/src/http-client.ts @@ -54,19 +54,17 @@ export interface EndpointOptions { * @param requestConfig - The request configuration. * @returns The {@link HttpResponse} from the AI Core service. */ -export async function executeRequest( +export async function executeRequest( endpointOptions: EndpointOptions, - data: Data, + data: any, requestConfig?: CustomRequestConfig ): Promise { const aiCoreDestination = await getAiCoreDestination(); - // eslint-disable-next-line @typescript-eslint/no-unused-vars - const { deploymentConfiguration, ...body } = data; const { url, apiVersion } = endpointOptions; const mergedRequestConfig = { ...mergeWithDefaultRequestConfig(apiVersion, requestConfig), - data: JSON.stringify(body) + data: JSON.stringify(data) }; const targetUrl = aiCoreDestination.url + `/v2/${removeLeadingSlashes(url)}`; diff --git a/packages/core/src/openapi-request-builder.ts b/packages/core/src/openapi-request-builder.ts index b6be5ee11..980ef526d 100644 --- a/packages/core/src/openapi-request-builder.ts +++ b/packages/core/src/openapi-request-builder.ts @@ -27,13 +27,9 @@ export class OpenApiRequestBuilder< async executeRaw(): Promise { const { url, data, ...rest } = await this.requestConfig(); // TODO: Remove explicit url! once we updated the type in the Cloud SDK, since url is always defined. - return executeRequest( - { url: url! }, - { deploymentConfiguration: {}, ...data }, - { - ...rest - } - ); + return executeRequest({ url: url! }, data, { + ...rest + }); } /** diff --git a/packages/gen-ai-hub/package.json b/packages/gen-ai-hub/package.json index 3e61f4027..8af98626c 100644 --- a/packages/gen-ai-hub/package.json +++ b/packages/gen-ai-hub/package.json @@ -29,6 +29,7 @@ }, "dependencies": { "@sap-ai-sdk/core": "workspace:^", + "@sap-ai-sdk/ai-core": "workspace:^", "@sap-cloud-sdk/http-client": "^3.18.1", "@sap-cloud-sdk/connectivity": "^3.18.1", "@sap-cloud-sdk/util": "^3.18.1", diff --git a/packages/gen-ai-hub/src/client/interface.ts b/packages/gen-ai-hub/src/client/interface.ts deleted file mode 100644 index d67e82de6..000000000 --- a/packages/gen-ai-hub/src/client/interface.ts +++ /dev/null @@ -1,28 +0,0 @@ -import { BaseLlmParameters, CustomRequestConfig } from '@sap-ai-sdk/core'; -import { BaseLlmOutput } from './types.js'; - -/** - * The base client interface for all provider specific clients. - */ -export interface BaseClient { - /** - * Creates a completion for the chat messages. - * @param data - The input parameters for the chat completion. - * @param requestConfig - The request configuration. - * @returns The completion result. - */ - chatCompletion( - data: T, - requestConfig?: CustomRequestConfig - ): Promise; - /** - * Creates an embedding vector representing the given text. - * @param data - The input parameters for the chat completion. - * @param requestConfig - The request configuration. - * @returns The completion result. - */ - embeddings( - data: T, - requestConfig?: CustomRequestConfig - ): Promise; -} diff --git a/packages/gen-ai-hub/src/client/openai/openai-client.test.ts b/packages/gen-ai-hub/src/client/openai/openai-client.test.ts index 35f712a5f..e11053fa2 100644 --- a/packages/gen-ai-hub/src/client/openai/openai-client.test.ts +++ b/packages/gen-ai-hub/src/client/openai/openai-client.test.ts @@ -1,5 +1,4 @@ import nock from 'nock'; -import { BaseLlmParametersWithDeploymentId } from '@sap-ai-sdk/core'; import { mockClientCredentialsGrantCall, mockInference, @@ -7,7 +6,6 @@ import { } from '../../../../../test-util/mock-http.js'; import { OpenAiChatCompletionOutput, - OpenAiChatCompletionParameters, OpenAiChatMessage, OpenAiEmbeddingOutput, OpenAiEmbeddingParameters @@ -15,15 +13,12 @@ import { import { OpenAiClient } from './openai-client.js'; describe('openai client', () => { - const deploymentConfiguration: BaseLlmParametersWithDeploymentId = { - deploymentId: 'deployment-id' - }; const chatCompletionEndpoint = { - url: `inference/deployments/${deploymentConfiguration.deploymentId}/chat/completions`, + url: 'inference/deployments/1234/chat/completions', apiVersion: '2024-02-01' }; const embeddingsEndpoint = { - url: `inference/deployments/${deploymentConfiguration.deploymentId}/embeddings`, + url: 'inference/deployments/1234/embeddings', apiVersion: '2024-02-01' }; @@ -47,10 +42,7 @@ describe('openai client', () => { } ] as OpenAiChatMessage[] }; - const request: OpenAiChatCompletionParameters = { - ...prompt, - deploymentConfiguration - }; + const mockResponse = parseMockResponse( 'openai', 'openai-chat-completion-success-response.json' @@ -58,7 +50,7 @@ describe('openai client', () => { mockInference( { - data: request + data: prompt }, { data: mockResponse, @@ -67,16 +59,16 @@ describe('openai client', () => { chatCompletionEndpoint ); - const response = await client.chatCompletion(request); + const response = await client.chatCompletion( + 'gpt-35-turbo', + prompt, + '1234' + ); expect(response).toEqual(mockResponse); }); it('throws on bad request', async () => { const prompt = { messages: [] }; - const request: OpenAiChatCompletionParameters = { - ...prompt, - deploymentConfiguration - }; const mockResponse = parseMockResponse( 'openai', 'openai-error-response.json' @@ -84,7 +76,7 @@ describe('openai client', () => { mockInference( { - data: request + data: prompt }, { data: mockResponse, @@ -93,17 +85,17 @@ describe('openai client', () => { chatCompletionEndpoint ); - expect(client.chatCompletion(request)).rejects.toThrow(); + await expect( + client.chatCompletion('gpt-4', prompt, '1234') + ).rejects.toThrow('status code 400'); }); }); describe('embeddings', () => { it('parses a successful response', async () => { - const prompt = { input: ['AI is fascinating'] }; - const request: OpenAiEmbeddingParameters = { - ...prompt, - deploymentConfiguration - }; + const prompt = { + input: ['AI is fascinating'] + } as OpenAiEmbeddingParameters; const mockResponse = parseMockResponse( 'openai', 'openai-embeddings-success-response.json' @@ -111,7 +103,7 @@ describe('openai client', () => { mockInference( { - data: request + data: prompt }, { data: mockResponse, @@ -119,16 +111,16 @@ describe('openai client', () => { }, embeddingsEndpoint ); - const response = await client.embeddings(request); + const response = await client.embeddings( + 'text-embedding-ada-002', + prompt, + '1234' + ); expect(response).toEqual(mockResponse); }); it('throws on bad request', async () => { const prompt = { input: [] }; - const request: OpenAiEmbeddingParameters = { - ...prompt, - deploymentConfiguration - }; const mockResponse = parseMockResponse( 'openai', 'openai-error-response.json' @@ -136,7 +128,7 @@ describe('openai client', () => { mockInference( { - data: request + data: prompt }, { data: mockResponse, @@ -145,7 +137,9 @@ describe('openai client', () => { embeddingsEndpoint ); - expect(client.embeddings(request)).rejects.toThrow(); + await expect( + client.embeddings('text-embedding-3-large', prompt, '1234') + ).rejects.toThrow('status code 400'); }); }); }); diff --git a/packages/gen-ai-hub/src/client/openai/openai-client.ts b/packages/gen-ai-hub/src/client/openai/openai-client.ts index 99b0e6a8d..443c8ad4f 100644 --- a/packages/gen-ai-hub/src/client/openai/openai-client.ts +++ b/packages/gen-ai-hub/src/client/openai/openai-client.ts @@ -1,60 +1,107 @@ +import { HttpRequestConfig } from '@sap-cloud-sdk/http-client'; +import { CustomRequestConfig, executeRequest } from '@sap-ai-sdk/core'; import { - BaseLlmParameters, - CustomRequestConfig, - executeRequest -} from '@sap-ai-sdk/core'; -import { BaseClient } from '../interface.js'; + DeploymentResolver, + resolveDeployment +} from '../../utils/deployment-resolver.js'; import { OpenAiChatCompletionParameters, OpenAiEmbeddingParameters, OpenAiEmbeddingOutput, - OpenAiChatCompletionOutput + OpenAiChatCompletionOutput, + OpenAiChatModel, + OpenAiEmbeddingModel } from './openai-types.js'; const apiVersion = '2024-02-01'; /** - * OpenAI GPT Client. + * OpenAI Client. */ -export class OpenAiClient implements BaseClient { +export class OpenAiClient { /** * Creates a completion for the chat messages. + * @param model - The model to use for the chat completion. * @param data - The input parameters for the chat completion. + * @param deploymentResolver - A deployment id or a function to retrieve it. * @param requestConfig - The request configuration. * @returns The completion result. */ async chatCompletion( + model: OpenAiChatModel | { name: OpenAiChatModel; version: string }, data: OpenAiChatCompletionParameters, + deploymentResolver?: DeploymentResolver, requestConfig?: CustomRequestConfig ): Promise { + const deploymentId = await resolveOpenAiDeployment( + model, + deploymentResolver + ); const response = await executeRequest( { - url: `/inference/deployments/${data.deploymentConfiguration.deploymentId}/chat/completions`, + url: `/inference/deployments/${deploymentId}/chat/completions`, apiVersion }, data, - requestConfig + mergeRequestConfig(requestConfig) ); return response.data; } /** * Creates an embedding vector representing the given text. - * @param data - The input parameters for the chat completion. + * @param model - The model to use for the embedding computation. + * @param data - The text to embed. + * @param deploymentResolver - A deployment id or a function to retrieve it. * @param requestConfig - The request configuration. * @returns The completion result. */ async embeddings( + model: + | OpenAiEmbeddingModel + | { name: OpenAiEmbeddingModel; version: string }, data: OpenAiEmbeddingParameters, + deploymentResolver?: DeploymentResolver, requestConfig?: CustomRequestConfig ): Promise { + const deploymentId = await resolveOpenAiDeployment( + model, + deploymentResolver + ); const response = await executeRequest( - { - url: `/inference/deployments/${data.deploymentConfiguration.deploymentId}/embeddings`, - apiVersion - }, + { url: `/inference/deployments/${deploymentId}/embeddings`, apiVersion }, data, - requestConfig + mergeRequestConfig(requestConfig) ); return response.data; } } + +async function resolveOpenAiDeployment( + model: string | { name: string; version: string }, + resolver?: DeploymentResolver +) { + if (typeof resolver === 'string') { + return resolver; + } + const llm = + typeof model === 'string' ? { name: model, version: 'latest' } : model; + const deployment = await resolveDeployment({ + scenarioId: 'foundation-models', + executableId: 'azure-openai', + model: llm + }); + return deployment.id; +} + +function mergeRequestConfig( + requestConfig?: CustomRequestConfig +): HttpRequestConfig { + return { + method: 'POST', + headers: { + 'content-type': 'application/json' + }, + params: { 'api-version': apiVersion }, + ...requestConfig + }; +} diff --git a/packages/gen-ai-hub/src/client/openai/openai-types.ts b/packages/gen-ai-hub/src/client/openai/openai-types.ts index 06e04b0cb..13d432f85 100644 --- a/packages/gen-ai-hub/src/client/openai/openai-types.ts +++ b/packages/gen-ai-hub/src/client/openai/openai-types.ts @@ -1,4 +1,21 @@ -import { BaseLlmParameters } from '@sap-ai-sdk/core'; +/** + * Available OpenAI models for chat completion. + */ +export type OpenAiChatModel = + | 'gpt-4o' + | 'gpt-4' + | 'gpt-4-32k' + | 'gpt-35-turbo' + | 'gpt-35-turbo-0125' + | 'gpt-35-turbo-16k'; + +/** + * OpenAI embedding models. + */ +export type OpenAiEmbeddingModel = + | 'text-embedding-ada-002' + | 'text-embedding-3-small' + | 'text-embedding-3-large'; /** * OpenAI system message. @@ -254,8 +271,7 @@ export interface OpenAiCompletionParameters { * OpenAI chat completion input parameters. */ export interface OpenAiChatCompletionParameters - extends OpenAiCompletionParameters, - BaseLlmParameters { + extends OpenAiCompletionParameters { /** * An array of system, user & assistant messages for chat completion. */ @@ -315,7 +331,7 @@ export interface OpenAiChatCompletionParameters /** * OpenAI embedding input parameters. */ -export interface OpenAiEmbeddingParameters extends BaseLlmParameters { +export interface OpenAiEmbeddingParameters { /** * Input text to get embeddings for, encoded as a string. The number of input tokens varies depending on what model you are using. Unless you're embedding code, we suggest replacing newlines (\n) in your input with a single space, as we have observed inferior results when newlines are present. */ diff --git a/packages/gen-ai-hub/src/client/types.ts b/packages/gen-ai-hub/src/client/types.ts deleted file mode 100644 index 42aa1caa1..000000000 --- a/packages/gen-ai-hub/src/client/types.ts +++ /dev/null @@ -1,9 +0,0 @@ -import { - OpenAiEmbeddingOutput, - OpenAiChatCompletionOutput -} from './openai/openai-types.js'; - -/** - * Base LLM Output. - */ -export type BaseLlmOutput = OpenAiChatCompletionOutput | OpenAiEmbeddingOutput; diff --git a/packages/gen-ai-hub/src/index.ts b/packages/gen-ai-hub/src/index.ts index 4816a9591..d9ecfe895 100644 --- a/packages/gen-ai-hub/src/index.ts +++ b/packages/gen-ai-hub/src/index.ts @@ -1,17 +1,10 @@ +export * from './client/index.js'; export { - OpenAiClient, - OpenAiChatCompletionParameters, - OpenAiEmbeddingParameters, - OpenAiEmbeddingOutput, - OpenAiChatCompletionOutput -} from './client/index.js'; -export { + OrchestrationClient, + OrchestrationCompletionParameters, + CompletionPostResponse, azureContentFilter, - GenAiHubClient, - GenAiHubCompletionParameters, - GenAiHubCompletionResponse, PromptConfig, LlmConfig, - ChatMessages, - CompletionPostResponse + ChatMessages } from './orchestration/index.js'; diff --git a/packages/gen-ai-hub/src/orchestration/orchestration-client.test.ts b/packages/gen-ai-hub/src/orchestration/orchestration-client.test.ts index c8d5c9f02..7f9e7bdb8 100644 --- a/packages/gen-ai-hub/src/orchestration/orchestration-client.test.ts +++ b/packages/gen-ai-hub/src/orchestration/orchestration-client.test.ts @@ -1,23 +1,19 @@ import nock from 'nock'; -import { BaseLlmParametersWithDeploymentId } from '@sap-ai-sdk/core'; import { mockClientCredentialsGrantCall, mockInference, parseMockResponse } from '../../../../test-util/mock-http.js'; import { CompletionPostResponse } from './client/api/index.js'; -import { GenAiHubCompletionParameters } from './orchestration-types.js'; import { - GenAiHubClient, + OrchestrationClient, constructCompletionPostRequest } from './orchestration-client.js'; import { azureContentFilter } from './orchestration-filter-utility.js'; +import { OrchestrationCompletionParameters } from './orchestration-types.js'; describe('GenAiHubClient', () => { - const client = new GenAiHubClient(); - const deploymentConfiguration: BaseLlmParametersWithDeploymentId = { - deploymentId: 'deployment-id' - }; + const client = new OrchestrationClient(); beforeEach(() => { mockClientCredentialsGrantCall(); @@ -28,8 +24,7 @@ describe('GenAiHubClient', () => { }); it('calls chatCompletion with minimum configuration', async () => { - const request: GenAiHubCompletionParameters = { - deploymentConfiguration, + const request = { llmConfig: { model_name: 'gpt-35-turbo-16k', model_params: { max_tokens: 50, temperature: 0.1 } @@ -46,26 +41,22 @@ describe('GenAiHubClient', () => { mockInference( { - data: { - deploymentConfiguration, - ...constructCompletionPostRequest(request) - } + data: constructCompletionPostRequest(request) }, { data: mockResponse, status: 200 }, { - url: `inference/deployments/${deploymentConfiguration.deploymentId}/completion` + url: 'inference/deployments/1234/completion' } ); - const response = await client.chatCompletion(request); + const response = await client.chatCompletion(request, '1234'); expect(response).toEqual(mockResponse); }); it('calls chatCompletion with filter configuration supplied using convenience function', async () => { - const request: GenAiHubCompletionParameters = { - deploymentConfiguration, + const request = { llmConfig: { model_name: 'gpt-35-turbo-16k', model_params: { max_tokens: 50, temperature: 0.1 } @@ -91,26 +82,22 @@ describe('GenAiHubClient', () => { mockInference( { - data: { - deploymentConfiguration, - ...constructCompletionPostRequest(request) - } + data: constructCompletionPostRequest(request) }, { data: mockResponse, status: 200 }, { - url: `inference/deployments/${deploymentConfiguration.deploymentId}/completion` + url: 'inference/deployments/1234/completion' } ); - const response = await client.chatCompletion(request); + const response = await client.chatCompletion(request, '1234'); expect(response).toEqual(mockResponse); }); it('calls chatCompletion with filtering configuration', async () => { - const request: GenAiHubCompletionParameters = { - deploymentConfiguration, + const request = { llmConfig: { model_name: 'gpt-35-turbo-16k', model_params: { max_tokens: 50, temperature: 0.1 } @@ -148,7 +135,7 @@ describe('GenAiHubClient', () => { ] } } - }; + } as OrchestrationCompletionParameters; const mockResponse = parseMockResponse( 'orchestration', 'genaihub-chat-completion-filter-config.json' @@ -156,26 +143,22 @@ describe('GenAiHubClient', () => { mockInference( { - data: { - deploymentConfiguration, - ...constructCompletionPostRequest(request) - } + data: constructCompletionPostRequest(request) }, { data: mockResponse, status: 200 }, { - url: `inference/deployments/${deploymentConfiguration.deploymentId}/completion` + url: 'inference/deployments/1234/completion' } ); - const response = await client.chatCompletion(request); + const response = await client.chatCompletion(request, '1234'); expect(response).toEqual(mockResponse); }); it('sends message history together with templating config', async () => { - const request: GenAiHubCompletionParameters = { - deploymentConfiguration, + const request = { llmConfig: { model_name: 'gpt-35-turbo-16k', model_params: { max_tokens: 50, temperature: 0.1 } @@ -206,20 +189,17 @@ describe('GenAiHubClient', () => { ); mockInference( { - data: { - deploymentConfiguration, - ...constructCompletionPostRequest(request) - } + data: constructCompletionPostRequest(request) }, { data: mockResponse, status: 200 }, { - url: `inference/deployments/${deploymentConfiguration.deploymentId}/completion` + url: 'inference/deployments/1234/completion' } ); - const response = await client.chatCompletion(request); + const response = await client.chatCompletion(request, '1234'); expect(response).toEqual(mockResponse); }); }); diff --git a/packages/gen-ai-hub/src/orchestration/orchestration-client.ts b/packages/gen-ai-hub/src/orchestration/orchestration-client.ts index 7b9c431d7..203b10883 100644 --- a/packages/gen-ai-hub/src/orchestration/orchestration-client.ts +++ b/packages/gen-ai-hub/src/orchestration/orchestration-client.ts @@ -1,34 +1,42 @@ import { executeRequest, CustomRequestConfig } from '@sap-ai-sdk/core'; -import { CompletionPostRequest } from './client/api/schema/index.js'; import { - GenAiHubCompletionParameters, - GenAiHubCompletionResponse -} from './orchestration-types.js'; + DeploymentResolver, + resolveDeployment +} from '../utils/deployment-resolver.js'; +import { + CompletionPostRequest, + CompletionPostResponse +} from './client/api/schema/index.js'; +import { OrchestrationCompletionParameters } from './orchestration-types.js'; /** * Get the orchestration client. */ -export class GenAiHubClient { +export class OrchestrationClient { /** * Creates a completion for the chat messages. * @param data - The input parameters for the chat completion. + * @param deploymentResolver - A deployment ID or a function to retrieve it. * @param requestConfig - Request configuration. * @returns The completion result. */ async chatCompletion( - data: GenAiHubCompletionParameters, + data: OrchestrationCompletionParameters, + deploymentResolver: DeploymentResolver = () => + resolveDeployment({ scenarioId: 'orchestration' }), requestConfig?: CustomRequestConfig - ): Promise { - const dataWithInputParams = { - deploymentConfiguration: data.deploymentConfiguration, - ...constructCompletionPostRequest(data) - }; + ): Promise { + const body = constructCompletionPostRequest(data); + const deployment = + typeof deploymentResolver === 'function' + ? (await deploymentResolver()).id + : deploymentResolver; const response = await executeRequest( { - url: `/inference/deployments/${data.deploymentConfiguration.deploymentId}/completion` + url: `/inference/deployments/${deployment}/completion` }, - dataWithInputParams, + body, requestConfig ); return response.data; @@ -39,7 +47,7 @@ export class GenAiHubClient { * @internal */ export function constructCompletionPostRequest( - input: GenAiHubCompletionParameters + input: OrchestrationCompletionParameters ): CompletionPostRequest { return { orchestration_config: { diff --git a/packages/gen-ai-hub/src/orchestration/orchestration-completion-post-request.test.ts b/packages/gen-ai-hub/src/orchestration/orchestration-completion-post-request.test.ts index 9fdedd542..919c76684 100644 --- a/packages/gen-ai-hub/src/orchestration/orchestration-completion-post-request.test.ts +++ b/packages/gen-ai-hub/src/orchestration/orchestration-completion-post-request.test.ts @@ -1,29 +1,20 @@ import { CompletionPostRequest } from './client/api/index.js'; import { constructCompletionPostRequest } from './orchestration-client.js'; import { azureContentFilter } from './orchestration-filter-utility.js'; -import { GenAiHubCompletionParameters } from './orchestration-types.js'; +import { OrchestrationCompletionParameters } from './orchestration-types.js'; describe('constructCompletionPostRequest()', () => { - const genaihubCompletionParameters: GenAiHubCompletionParameters = { - deploymentConfiguration: { - deploymentId: 'deployment-id' - }, - llmConfig: { - model_name: 'gpt-35-turbo-16k', - model_params: { max_tokens: 50, temperature: 0.1 } - }, - prompt: { - template: [{ role: 'user', content: 'Hi' }] - } - }; + let input: OrchestrationCompletionParameters; beforeEach(() => { - genaihubCompletionParameters.llmConfig = { - model_name: 'gpt-35-turbo-16k', - model_params: { max_tokens: 50, temperature: 0.1 } - }; - genaihubCompletionParameters.prompt = { - template: [{ role: 'user', content: 'Hi' }] + input = { + llmConfig: { + model_name: 'gpt-35-turbo-16k', + model_params: { max_tokens: 50, temperature: 0.1 } + }, + prompt: { + template: [{ role: 'user', content: 'Hi' }] + } }; }); @@ -42,13 +33,13 @@ describe('constructCompletionPostRequest()', () => { } }; const completionPostRequest: CompletionPostRequest = - constructCompletionPostRequest(genaihubCompletionParameters); + constructCompletionPostRequest(input); expect(completionPostRequest).toEqual(expectedCompletionPostRequest); }); // Todo: Adapt the test after Cloud SDK fix for: https://github.com/SAP/cloud-sdk-backlog/issues/1234 it('with model configuration and empty template', async () => { - genaihubCompletionParameters.prompt.template = []; + input.prompt.template = []; const expectedCompletionPostRequest: CompletionPostRequest = { orchestration_config: { module_configurations: { @@ -63,12 +54,12 @@ describe('constructCompletionPostRequest()', () => { } }; const completionPostRequest: CompletionPostRequest = - constructCompletionPostRequest(genaihubCompletionParameters); + constructCompletionPostRequest(input); expect(completionPostRequest).toEqual(expectedCompletionPostRequest); }); it('with model configuration, prompt template and template params', async () => { - genaihubCompletionParameters.prompt = { + input.prompt = { template: [ { role: 'user', @@ -97,12 +88,12 @@ describe('constructCompletionPostRequest()', () => { input_params: { phrase: 'I hate you.', number: 3 } }; const completionPostRequest: CompletionPostRequest = - constructCompletionPostRequest(genaihubCompletionParameters); + constructCompletionPostRequest(input); expect(completionPostRequest).toEqual(expectedCompletionPostRequest); }); it('with model configuration, prompt template and empty template params', async () => { - genaihubCompletionParameters.prompt = { + input.prompt = { template: [ { role: 'user', @@ -131,16 +122,16 @@ describe('constructCompletionPostRequest()', () => { input_params: {} }; const completionPostRequest: CompletionPostRequest = - constructCompletionPostRequest(genaihubCompletionParameters); + constructCompletionPostRequest(input); expect(completionPostRequest).toEqual(expectedCompletionPostRequest); }); it('with model name, empty model parameters and prompt template', async () => { - genaihubCompletionParameters.llmConfig = { + input.llmConfig = { model_name: 'gpt-35-turbo-16k', model_params: {} }; - genaihubCompletionParameters.filterConfig = {}; + input.filterConfig = {}; const expectedCompletionPostRequest: CompletionPostRequest = { orchestration_config: { module_configurations: { @@ -160,12 +151,12 @@ describe('constructCompletionPostRequest()', () => { } }; const completionPostRequest: CompletionPostRequest = - constructCompletionPostRequest(genaihubCompletionParameters); + constructCompletionPostRequest(input); expect(completionPostRequest).toEqual(expectedCompletionPostRequest); }); it('with model configuration, prompt template and message history', async () => { - genaihubCompletionParameters.prompt = { + input.prompt = { template: [{ role: 'user', content: "What's my name?" }], messages_history: [ { @@ -219,12 +210,12 @@ describe('constructCompletionPostRequest()', () => { ] }; const completionPostRequest: CompletionPostRequest = - constructCompletionPostRequest(genaihubCompletionParameters); + constructCompletionPostRequest(input); expect(completionPostRequest).toEqual(expectedCompletionPostRequest); }); it('with model configuration, prompt template and filter configuration', async () => { - genaihubCompletionParameters.filterConfig = { + input.filterConfig = { input: azureContentFilter({ Hate: 4, SelfHarm: 0 }) }; const expectedCompletionPostRequest: CompletionPostRequest = { @@ -259,13 +250,13 @@ describe('constructCompletionPostRequest()', () => { } }; const completionPostRequest: CompletionPostRequest = - constructCompletionPostRequest(genaihubCompletionParameters); + constructCompletionPostRequest(input); expect(completionPostRequest).toEqual(expectedCompletionPostRequest); }); // Todo: Adapt the test after Cloud SDK fix for: https://github.com/SAP/cloud-sdk-backlog/issues/1234 it('with model configuration, prompt template empty filter configuration', async () => { - genaihubCompletionParameters.filterConfig = {}; + input.filterConfig = {}; const expectedCompletionPostRequest: CompletionPostRequest = { orchestration_config: { module_configurations: { @@ -285,7 +276,7 @@ describe('constructCompletionPostRequest()', () => { } }; const completionPostRequest: CompletionPostRequest = - constructCompletionPostRequest(genaihubCompletionParameters); + constructCompletionPostRequest(input); expect(completionPostRequest).toEqual(expectedCompletionPostRequest); }); }); diff --git a/packages/gen-ai-hub/src/orchestration/orchestration-filter-utility.test.ts b/packages/gen-ai-hub/src/orchestration/orchestration-filter-utility.test.ts index 087837bd0..bcb665ae7 100644 --- a/packages/gen-ai-hub/src/orchestration/orchestration-filter-utility.test.ts +++ b/packages/gen-ai-hub/src/orchestration/orchestration-filter-utility.test.ts @@ -4,13 +4,10 @@ import { } from './client/api/index.js'; import { constructCompletionPostRequest } from './orchestration-client.js'; import { azureContentFilter } from './orchestration-filter-utility.js'; -import { GenAiHubCompletionParameters } from './orchestration-types.js'; +import { OrchestrationCompletionParameters } from './orchestration-types.js'; describe('Filter utility', () => { - const genaihubCompletionParameters: GenAiHubCompletionParameters = { - deploymentConfiguration: { - deploymentId: 'deployment-id' - }, + const input: OrchestrationCompletionParameters = { llmConfig: { model_name: 'gpt-35-turbo-16k', model_params: { max_tokens: 50, temperature: 0.1 } @@ -24,7 +21,7 @@ describe('Filter utility', () => { }; afterEach(() => { - genaihubCompletionParameters.filterConfig = undefined; + input.filterConfig = undefined; }); it('constructs filter configuration with only input', async () => { @@ -44,9 +41,9 @@ describe('Filter utility', () => { ] } }; - genaihubCompletionParameters.filterConfig = filterConfig; + input.filterConfig = filterConfig; const completionPostRequest: CompletionPostRequest = - constructCompletionPostRequest(genaihubCompletionParameters); + constructCompletionPostRequest(input); expect( completionPostRequest.orchestration_config.module_configurations .filtering_module_config @@ -70,9 +67,9 @@ describe('Filter utility', () => { ] } }; - genaihubCompletionParameters.filterConfig = filterConfig; + input.filterConfig = filterConfig; const completionPostRequest: CompletionPostRequest = - constructCompletionPostRequest(genaihubCompletionParameters); + constructCompletionPostRequest(input); expect( completionPostRequest.orchestration_config.module_configurations .filtering_module_config @@ -115,9 +112,9 @@ describe('Filter utility', () => { ] } }; - genaihubCompletionParameters.filterConfig = filterConfig; + input.filterConfig = filterConfig; const completionPostRequest: CompletionPostRequest = - constructCompletionPostRequest(genaihubCompletionParameters); + constructCompletionPostRequest(input); expect( completionPostRequest.orchestration_config.module_configurations .filtering_module_config @@ -129,9 +126,9 @@ describe('Filter utility', () => { input: azureContentFilter(), output: azureContentFilter() }; - genaihubCompletionParameters.filterConfig = filterConfig; + input.filterConfig = filterConfig; const completionPostRequest: CompletionPostRequest = - constructCompletionPostRequest(genaihubCompletionParameters); + constructCompletionPostRequest(input); const expectedFilterConfig: FilteringModuleConfig = { input: { filters: [ @@ -156,9 +153,9 @@ describe('Filter utility', () => { it('omits filter configuration if not set', async () => { const filterConfig: FilteringModuleConfig = {}; - genaihubCompletionParameters.filterConfig = filterConfig; + input.filterConfig = filterConfig; const completionPostRequest: CompletionPostRequest = - constructCompletionPostRequest(genaihubCompletionParameters); + constructCompletionPostRequest(input); expect( completionPostRequest.orchestration_config.module_configurations .filtering_module_config diff --git a/packages/gen-ai-hub/src/orchestration/orchestration-types.ts b/packages/gen-ai-hub/src/orchestration/orchestration-types.ts index 74c012c0e..e993be15f 100644 --- a/packages/gen-ai-hub/src/orchestration/orchestration-types.ts +++ b/packages/gen-ai-hub/src/orchestration/orchestration-types.ts @@ -1,22 +1,10 @@ -import { BaseLlmParameters } from '@sap-ai-sdk/core'; import { ChatMessages, - CompletionPostResponse, FilteringModuleConfig, InputParamsEntry, LLMModuleConfig } from './client/api/index.js'; -/** - * Input Parameters for GenAI hub chat completion. - */ -export type GenAiHubCompletionParameters = BaseLlmParameters & - OrchestrationCompletionParameters; - -/** - * Response for GenAI hub chat completion. - */ -export type GenAiHubCompletionResponse = CompletionPostResponse; /** * Wrapper object to configure prompt. */ diff --git a/packages/gen-ai-hub/src/utils/deployment-resolver.test.ts b/packages/gen-ai-hub/src/utils/deployment-resolver.test.ts new file mode 100644 index 000000000..d12288451 --- /dev/null +++ b/packages/gen-ai-hub/src/utils/deployment-resolver.test.ts @@ -0,0 +1,130 @@ +import nock from 'nock'; +import { + mockClientCredentialsGrantCall, + aiCoreDestination +} from '../../../../test-util/mock-http.js'; +import { resolveDeployment } from './deployment-resolver.js'; + +describe('Deployment resolver', () => { + beforeEach(() => { + mockClientCredentialsGrantCall(); + }); + + afterEach(() => { + nock.cleanAll(); + }); + + describe('should lookup the deployment ID based on a scenario', () => { + beforeEach(() => { + mockResponse(); + }); + it('should return the first deployment, if multiple are given', async () => { + const { id, configurationId } = await resolveDeployment({ + scenarioId: 'foundation-models' + }); + expect(id).toBe('1'); + expect(configurationId).toBe('c1'); + }); + it('should return the deployment with the correct model name', async () => { + const { id, configurationId } = await resolveDeployment({ + scenarioId: 'foundation-models', + model: { name: 'gpt-4o' } + }); + expect(id).toBe('2'); + expect(configurationId).toBe('c2'); + }); + it('should return the deployment with the correct model name', async () => { + const { id, configurationId } = await resolveDeployment({ + scenarioId: 'foundation-models', + model: { name: 'gpt-4o', version: '0613' } + }); + expect(id).toBe('2'); + expect(configurationId).toBe('c2'); + }); + it('should throw in case no deployment with the given model name is found', async () => { + await expect( + resolveDeployment({ + scenarioId: 'foundation-models', + model: { name: 'not existing' } + }) + ).rejects.toThrow('No deployment matched the given criteria'); + }); + it('should throw in case no deployment with the given model version is found', async () => { + await expect( + resolveDeployment({ + scenarioId: 'foundation-models', + model: { name: 'gpt-4o', version: 'not existing' } + }) + ).rejects.toThrow('No deployment matched the given criteria'); + }); + }); + + it('should throw on empty list', async () => { + nock(aiCoreDestination.url, { + reqheaders: { + 'ai-resource-group': 'default' + } + }) + .get('/v2/lm/deployments') + .query({ scenarioId: 'foundation-models', status: 'RUNNING' }) + .reply(200, { + count: 0, + resources: [] + }); + + await expect( + resolveDeployment({ scenarioId: 'foundation-models' }) + ).rejects.toThrow('No deployment matched the given criteria'); + }); +}); + +function mockResponse() { + nock(aiCoreDestination.url, { + reqheaders: { + 'ai-resource-group': 'default' + } + }) + .get('/v2/lm/deployments') + .query({ scenarioId: 'foundation-models', status: 'RUNNING' }) + .reply(200, { + count: 1, + resources: [ + { + configurationId: 'c1', + id: '1', + deploymentUrl: 'https://foo.com/v2/inference/deployments/1', + details: { + resources: { + backend_details: { + model: { + name: 'gpt-4-32k', + version: 'latest' + } + } + }, + scaling: { + backend_details: {} + } + }, + lastOperation: 'CREATE', + status: 'RUNNING' + }, + { + configurationId: 'c2', + id: '2', + deploymentUrl: 'https://foo.com/v2/inference/deployments/2', + details: { + resources: { + backend_details: { + model: { + name: 'gpt-4o', + version: '0613' + } + } + } + }, + status: 'RUNNING' + } + ] + }); +} diff --git a/packages/gen-ai-hub/src/utils/deployment-resolver.ts b/packages/gen-ai-hub/src/utils/deployment-resolver.ts new file mode 100644 index 000000000..a2c745512 --- /dev/null +++ b/packages/gen-ai-hub/src/utils/deployment-resolver.ts @@ -0,0 +1,82 @@ +import { + DeploymentApi, + AiDeployment, + AiDeploymentStatus +} from '@sap-ai-sdk/ai-core'; + +/** + * A deployment resolver can be either a deployment ID or a function that returns a full deployment object. + */ +export type DeploymentResolver = DeploymentId | (() => Promise); +/** + * A deployment ID is a string that uniquely identifies a deployment. + */ +export type DeploymentId = string; +/** + * A foundation model is identifier by its name and potentially a version. + */ +export interface FoundationModel { + /** + * The name of the model. + */ + name: string; + /** + * The version of the model. + */ + version?: string; +} + +/** + * Query the AI Core service for a deployment that matches the given criteria. If more than one deployment matches the criteria, the first one is returned. + * @param opts - The options for the deployment resolution. + * @param opts.scenarioId - The scenario ID of the deployment. + * @param opts.executableId - The executable of the deployment. + * @param opts.model - The name and potentially version of the model to look for. + * @returns An AiDeployment, if a deployment was found, fails otherwise. + */ +export async function resolveDeployment(opts: { + scenarioId: string; + executableId?: string; + model?: FoundationModel; +}): Promise { + const query = { + scenarioId: opts.scenarioId, + status: 'RUNNING' as AiDeploymentStatus, + ...(opts.executableId && { executableIds: [opts.executableId] }) + }; + + // TODO: add a cache: https://github.tools.sap/AI/gen-ai-hub-sdk-js-backlog/issues/78 + let deploymentList: AiDeployment[]; + const { deploymentQuery } = DeploymentApi; + const resourceGroup = { 'AI-Resource-Group': 'default' }; + try { + deploymentList = (await deploymentQuery(query, resourceGroup).execute()) + .resources; + } catch (error) { + throw new Error('Failed to fetch the list of deployments: ' + error); + } + + if (opts.model) { + const modelName = opts.model.name; + deploymentList = deploymentList.filter( + deployment => extractModel(deployment)?.name === modelName + ); + if (opts.model.version) { + const modelVersion = opts.model.version; + // feature idea: smart handling of 'latest' version: treat 'latest' and the highest version number as the same + deploymentList = deploymentList.filter( + deployment => extractModel(deployment)?.version === modelVersion + ); + } + } + + if (!deploymentList.length) { + throw new Error( + 'No deployment matched the given criteria: ' + JSON.stringify(opts) + ); + } + return deploymentList[0]; +} + +const extractModel = (deployment: AiDeployment) => + deployment.details?.resources?.backend_details?.model; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index f95ebfc19..ba71cee39 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -94,6 +94,9 @@ importers: packages/gen-ai-hub: dependencies: + '@sap-ai-sdk/ai-core': + specifier: workspace:^ + version: link:../ai-core '@sap-ai-sdk/core': specifier: workspace:^ version: link:../core diff --git a/sample-code/src/aiservice.ts b/sample-code/src/aiservice.ts index 36f0543f8..b161682f7 100644 --- a/sample-code/src/aiservice.ts +++ b/sample-code/src/aiservice.ts @@ -1,45 +1,30 @@ -import { OpenAiClient } from '@sap-ai-sdk/gen-ai-hub'; +import { + OpenAiClient, + OpenAiChatAssistantMessage +} from '@sap-ai-sdk/gen-ai-hub'; const openAiClient = new OpenAiClient(); -const deployments: { [model: string]: string } = { - 'gpt-4-32k': 'd577d927380c98ea', - 'gpt-35-turbo': 'd66d1927bf590375', - ada: 'd0084a63ebd7bcd3' -}; - /** * Ask GPT about the capital of France. * @returns The answer from GPT. */ -export function chatCompletion(): Promise { - const config = getConfig('gpt-35-turbo'); - return openAiClient - .chatCompletion({ - ...config, - messages: [{ role: 'user', content: 'What is the capital of France?' }] - }) - .then(response => response.choices[0].message.content); +export async function chatCompletion(): Promise { + const response = await openAiClient.chatCompletion('gpt-35-turbo', { + messages: [{ role: 'user', content: 'What is the capital of France?' }] + }); + const assistantMessage = response.choices[0] + .message as OpenAiChatAssistantMessage; + return assistantMessage.content!; } /** * Embed 'Hello, world!' using the OpenAI ADA model. * @returns An embedding vector. */ -export function computeEmbedding(): Promise { - const config = getConfig('ada'); - return openAiClient - .embeddings({ - ...config, - input: 'Hello, world!' - }) - .then(response => response.data[0].embedding); -} - -function getConfig(model: string) { - return { - deploymentConfiguration: { - deploymentId: deployments[model] - } - }; +export async function computeEmbedding(): Promise { + const response = await openAiClient.embeddings('text-embedding-ada-002', { + input: 'Hello, world!' + }); + return response.data[0].embedding; } diff --git a/test-util/mock-http.ts b/test-util/mock-http.ts index 744d26c0c..045667b14 100644 --- a/test-util/mock-http.ts +++ b/test-util/mock-http.ts @@ -88,9 +88,9 @@ export function mockClientCredentialsGrantCall( .reply(responseCode, response); } -export function mockInference( +export function mockInference( request: { - data: D; + data: any; requestConfig?: CustomRequestConfig; }, response: { @@ -99,7 +99,6 @@ export function mockInference( }, endpoint: EndpointOptions = mockEndpoint ): nock.Scope { - const { deploymentConfiguration, ...body } = request.data; const { url, apiVersion } = endpoint; const destination = getMockedAiCoreDestination(); return nock(destination.url, { @@ -107,9 +106,9 @@ export function mockInference( 'ai-resource-group': 'default', authorization: `Bearer ${destination.authTokens?.[0].value}` } - }).post( - `/v2/${url}`, - body as any + }) + .post(`/v2/${url}`, + request.data ) .query(apiVersion ? { 'api-version': apiVersion } : {}) .reply(response.status, response.data); diff --git a/tests/e2e-tests/src/orchestration.test.ts b/tests/e2e-tests/src/orchestration.test.ts index 56b34b67f..f25f3e739 100644 --- a/tests/e2e-tests/src/orchestration.test.ts +++ b/tests/e2e-tests/src/orchestration.test.ts @@ -2,8 +2,8 @@ import path from 'path'; import { fileURLToPath } from 'url'; import dotenv from 'dotenv'; import { - GenAiHubClient, - GenAiHubCompletionParameters + OrchestrationClient, + OrchestrationCompletionParameters } from '@sap-ai-sdk/gen-ai-hub'; // Pick .env file from root directory @@ -13,8 +13,7 @@ dotenv.config({ path: path.resolve(__dirname, '../.env') }); describe('orchestration', () => { it('should complete a chat', async () => { - const request: GenAiHubCompletionParameters = { - deploymentConfiguration: { deploymentId: 'db1d64d9f06be467' }, + const request: OrchestrationCompletionParameters = { llmConfig: { model_name: 'gpt-35-turbo-16k', model_params: { max_tokens: 50, temperature: 0.1 } @@ -34,7 +33,7 @@ describe('orchestration', () => { } } }; - const response = await new GenAiHubClient().chatCompletion(request); + const response = await new OrchestrationClient().chatCompletion(request); expect(response.module_results).toBeDefined(); expect(response.module_results.templating).not.toHaveLength(0); diff --git a/tests/type-tests/test/context.test-d.ts b/tests/type-tests/test/context.test-d.ts index aa967764e..431bf85c1 100644 --- a/tests/type-tests/test/context.test-d.ts +++ b/tests/type-tests/test/context.test-d.ts @@ -1,5 +1,5 @@ -import { Destination } from '@sap-cloud-sdk/connectivity'; +import { HttpDestination } from '@sap-cloud-sdk/connectivity'; import { expectType } from 'tsd'; import { getAiCoreDestination } from '@sap-ai-sdk/core'; -expectType>(getAiCoreDestination()); +expectType>(getAiCoreDestination()); diff --git a/tests/type-tests/test/http-client.test-d.ts b/tests/type-tests/test/http-client.test-d.ts index 918a9f65f..bbb3d5ec8 100644 --- a/tests/type-tests/test/http-client.test-d.ts +++ b/tests/type-tests/test/http-client.test-d.ts @@ -5,21 +5,8 @@ import { executeRequest } from '@sap-ai-sdk/core'; expectType>( executeRequest( { url: 'https://example.com', apiVersion: 'v1' }, - { deploymentConfiguration: { deploymentId: 'id' }, prompt: 'test prompt' }, { headers: { 'Content-Type': 'application/json' } } ) ); -expectError( - executeRequest( - { url: 'https://example.com', apiVersion: 'v1' }, - { prompt: 'test prompt' } - ) -); - -expectError( - executeRequest( - {}, - { deploymentConfiguration: { deploymentId: 'id' }, prompt: 'test prompt' } - ) -); +expectError(executeRequest({}, { prompt: 'test prompt' })); diff --git a/tests/type-tests/test/openai.test-d.ts b/tests/type-tests/test/openai.test-d.ts index 443662e65..e39c30da4 100644 --- a/tests/type-tests/test/openai.test-d.ts +++ b/tests/type-tests/test/openai.test-d.ts @@ -12,14 +12,7 @@ expectType(client); * Chat Completion. */ expectType>( - client.chatCompletion({ - deploymentConfiguration: { deploymentId: 'id' }, - messages: [{ role: 'user', content: 'test prompt' }] - }) -); - -expectError( - client.chatCompletion({ + client.chatCompletion('gpt-4', { messages: [{ role: 'user', content: 'test prompt' }] }) ); @@ -28,10 +21,9 @@ expectError( * Embeddings. */ expectType>( - client.embeddings({ - deploymentConfiguration: { deploymentId: 'id' }, + client.embeddings('text-embedding-ada-002', { input: 'test input' }) ); -expectError(client.embeddings({ input: 'test input' })); +expectError(client.embeddings('gpt-35-turbo', { input: 'test input' })); diff --git a/tests/type-tests/test/orchestration.test-d.ts b/tests/type-tests/test/orchestration.test-d.ts index 82cb0dd16..c85042848 100644 --- a/tests/type-tests/test/orchestration.test-d.ts +++ b/tests/type-tests/test/orchestration.test-d.ts @@ -1,15 +1,17 @@ import { expectError, expectType } from 'tsd'; -import { GenAiHubClient, CompletionPostResponse } from '@sap-ai-sdk/gen-ai-hub'; +import { + OrchestrationClient, + CompletionPostResponse +} from '@sap-ai-sdk/gen-ai-hub'; -const client = new GenAiHubClient(); -expectType(client); +const client = new OrchestrationClient(); +expectType(client); /** * Chat Completion. */ expectType>( client.chatCompletion({ - deploymentConfiguration: { deploymentId: 'id' }, prompt: { template: [{ role: 'user', content: 'Hello!' }] }, @@ -25,7 +27,6 @@ expectType>( */ expectType>( client.chatCompletion({ - deploymentConfiguration: { deploymentId: 'id' }, prompt: { template: [{ role: 'user', content: 'Hello!' }], messages_history: [ @@ -47,36 +48,16 @@ expectType>( }) ); -/** - * Deployment details are mandatory. - */ -expectError>( - client.chatCompletion({ - prompt: { - template: [{ role: 'user', content: 'Hello!' }] - }, - llmConfig: { - model_name: 'gpt-35-turbo-16k', - model_params: {} - } - }) -); - /** * Orchestration completion parameters cannot be empty. */ -expectError( - client.chatCompletion({ - deploymentConfiguration: { deploymentId: 'id' } - }) -); +expectError(client.chatCompletion({})); /** * Prompt templates cannot be empty. */ expectError( client.chatCompletion({ - deploymentConfiguration: { deploymentId: 'id' }, llmConfig: { model_name: 'gpt-35-turbo-16k', model_params: {} @@ -89,7 +70,6 @@ expectError( */ expectError( client.chatCompletion({ - deploymentConfiguration: { deploymentId: 'id' }, prompt: { template: [{ role: 'user', content: 'Hello!' }] }, @@ -104,7 +84,6 @@ expectError( */ expectType>( client.chatCompletion({ - deploymentConfiguration: { deploymentId: 'id' }, prompt: { template: [{ role: 'user', content: 'Hello!' }] },