Skip to content

Commit

Permalink
feat: vllm with bitsandbytes, exp. backoffs in bench
Browse files Browse the repository at this point in the history
  • Loading branch information
av committed Sep 12, 2024
1 parent 876b722 commit de1ee3a
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 5 deletions.
22 changes: 21 additions & 1 deletion bench/src/llm.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { config } from "./config.ts";
import { omit } from './utils.ts';
import { omit, sleep } from './utils.ts';

export type LLMOptions = {
maxTokens?: number;
Expand All @@ -21,6 +21,26 @@ export class LLM {
}

async chat(message: string, options = {}): Promise<string> {
const maxRetries = 4;
let retries = 0;

while (retries < maxRetries) {
try {
return await this.attemptChat(message, options);
} catch (error) {
retries++;
if (retries >= maxRetries) {
throw error;
}
console.warn(`Attempt ${retries} failed. Retrying in ${2 ** retries} seconds...`);
await sleep(2 ** retries * 1000); // Exponential backoff
}
}

throw new Error('Max retries reached');
}

private async attemptChat(message: string, options = {}): Promise<string> {
const completionOptions = {
...(this.llm?.options || {}),
...options,
Expand Down
4 changes: 3 additions & 1 deletion bench/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -168,4 +168,6 @@ export function parseArgs(args: string[]) {

return acc;
}, {} as Record<string, string | string[] | boolean>);
}
}

export const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
6 changes: 5 additions & 1 deletion compose.vllm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@ services:
vllm:
container_name: ${HARBOR_CONTAINER_PREFIX}.vllm
env_file: ./.env
image: vllm/vllm-openai:${HARBOR_VLLM_VERSION}
build:
context: ./vllm
dockerfile: Dockerfile
args:
HARBOR_VLLM_VERSION: ${HARBOR_VLLM_VERSION}
ports:
- ${HARBOR_VLLM_HOST_PORT}:8000
ipc: host
Expand Down
4 changes: 2 additions & 2 deletions http-catalog/vllm.http
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ curl {{host}}/v1/chat/completions -H 'Content-Type: application/json' -d '{
###

curl {{host}}/v1/chat/completions -H 'Content-Type: application/json' -d '{
"model": "google/gemma-2-2b-it",
"model": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"messages": [
{
"role": "user",
"content": "Answer in one word. Where is Paris?"
"content": "Answer in one word starting with a letter 'A'. Where is Paris?"
}
],
"temperature": 0.2
Expand Down
5 changes: 5 additions & 0 deletions vllm/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ARG HARBOR_VLLM_VERSION=latest
FROM vllm/vllm-openai:${HARBOR_VLLM_VERSION}

# Install bitsandbytes for additional quantization support
RUN pip install bitsandbytes

0 comments on commit de1ee3a

Please sign in to comment.