Skip to content

Commit

Permalink
inference server
Browse files Browse the repository at this point in the history
  • Loading branch information
aj-ya committed Sep 29, 2023
1 parent 7e201f4 commit 9aeaa44
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 10 deletions.
5 changes: 5 additions & 0 deletions .changeset/happy-lobsters-tan.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'outpostkit': patch
---

Inference server
18 changes: 8 additions & 10 deletions src/utils/inference/vllm-stream.ts
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
import { API_V1_URL } from '../../constants';
import { PromptPayload } from 'types';
import { EventStreamContentType, fetchEventSource } from '@microsoft/fetch-event-source';
import { APIError } from 'error';
import { VLLMPromptParameters } from 'types/inference';

export const streamGenericInferenceServer = (
cometId: string,
apiKey: string,
payload: PromptPayload,
domain: string,
payload: { prompt: string } & VLLMPromptParameters,
handleNewChunk?: (chunk: string) => void | Promise<void>
) => {
return new Promise<string>((resolve, reject) => {
(async () => {
const response = await fetch(`${API_V1_URL}/comets/${cometId}/prompt`, {
const response = await fetch(`${domain}/generate`, {
method: 'POST',
body: JSON.stringify(payload),
body: JSON.stringify({ ...payload, stream: true }),
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${apiKey}`,
Accept: 'text/plain, application/json',
},
});
Expand All @@ -33,6 +30,7 @@ export const streamGenericInferenceServer = (
let chunk = textDecoder.decode(value);
responseText += chunk;
console.log('chunk:', chunk);
handleNewChunk(chunk);
}
if (done) {
try {
Expand Down Expand Up @@ -91,7 +89,7 @@ class ClientError extends Error {}
// class FatalError extends Error {}

export const streamOpenAIInferenceServer = async (
payload: PromptPayload,
payload: { prompt: string; model: string } & VLLMPromptParameters,
domain: string,
type: 'chat' | 'text',
handleNewChunk?: (chunk: string) => void | Promise<void>
Expand All @@ -104,7 +102,7 @@ export const streamOpenAIInferenceServer = async (
'Content-Type': 'application/json',
Accept: 'text/event-stream',
},
body: JSON.stringify(payload),
body: JSON.stringify({ ...payload, stream: true }),
async onopen(response) {
const contentType = response.headers.get('content-type');
if (response.ok && contentType.includes(EventStreamContentType)) {
Expand Down

0 comments on commit 9aeaa44

Please sign in to comment.