Skip to content

Commit

Permalink
fix(generation): decrease window size and make it configurable (#39)
Browse files Browse the repository at this point in the history
Signed-off-by: Tomas Pilar <tomas.pilar@ibm.com>
  • Loading branch information
pilartomas authored Mar 25, 2024
1 parent d143f99 commit 84fb85a
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@ibm-generative-ai/cli",
"version": "2.0.3",
"version": "2.0.4",
"description": "CLI for IBM Generative AI (Tech Preview)",
"keywords": [
"ai",
Expand Down
11 changes: 7 additions & 4 deletions src/commands/text/generation/create.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ import { clientMiddleware } from "../../../middleware/client.js";

import { generationConfig, generationMiddleware } from "./index.js";

const REQUEST_LIMIT = 1000; // We want to have reasonable memory footprint while maintaining full performance

export const createCommandDefinition = [
["create [inputs..]"],
"Generate text based on input(s)",
Expand All @@ -28,6 +26,11 @@ export const createCommandDefinition = [
description: "Continue even if generation fails for an input",
default: false,
},
"window": {
type: "number",
description: "Maximum number of inputs to process concurrently",
default: 10, // Concurrency limit is the sweet point, once SDK supports array of inputs, this will no longer be necessary
},
})
),
async (args) => {
Expand All @@ -37,7 +40,7 @@ export const createCommandDefinition = [
? Readable.from(inlineInputs)
: createInputStream(stdin);

const { model, parameters, allowErrors } = args;
const { model, parameters, allowErrors, window } = args;

const requests = [];
const consume = async (request) => {
Expand All @@ -55,7 +58,7 @@ export const createCommandDefinition = [
// Produce requests
for await (const input of inputStream) {
// If limit has been reached, consume the oldest request first
if (requests.length >= REQUEST_LIMIT) await consume(requests.shift());
if (requests.length >= window) await consume(requests.shift());
requests.push(
args.client.text.generation.create(
{
Expand Down

0 comments on commit 84fb85a

Please sign in to comment.