diff --git a/.changeset/curly-peaches-wonder.md b/.changeset/curly-peaches-wonder.md new file mode 100644 index 00000000..83f8f14e --- /dev/null +++ b/.changeset/curly-peaches-wonder.md @@ -0,0 +1,7 @@ +--- +"@llm-ui/json": patch +--- + +Multiple JSON blocks bug fixes + +Sometimes two JSON blocks of different types would not be found properly. This has been fixed. diff --git a/packages/csv/src/matchers.test.ts b/packages/csv/src/matchers.test.ts index 797e01f8..7bce7a87 100644 --- a/packages/csv/src/matchers.test.ts +++ b/packages/csv/src/matchers.test.ts @@ -81,6 +81,36 @@ describe("findCompleteCsvBlock", () => { outputRaw: "⦅t,a,b,c⦆", }, }, + { + name: "two same type blocks", + input: "⦅t,a,b,c⦆⦅t,a,b,c⦆", + options: { type: "t" }, + expected: { + startIndex: 0, + endIndex: 9, + outputRaw: "⦅t,a,b,c⦆", + }, + }, + { + name: "two different blocks", + input: "⦅t,a,b,c⦆⦅z,a,b,c⦆", + options: { type: "t" }, + expected: { + startIndex: 0, + endIndex: 9, + outputRaw: "⦅t,a,b,c⦆", + }, + }, + { + name: "two different blocks reversed", + input: "⦅z,a,b,c⦆⦅t,a,b,c⦆", + options: { type: "t" }, + expected: { + startIndex: 9, + endIndex: 18, + outputRaw: "⦅t,a,b,c⦆", + }, + }, { name: "not a block", input: "```\nhello\n```", diff --git a/packages/json/src/matchers.test.ts b/packages/json/src/matchers.test.ts index 3da0b5ed..1f75b5b1 100644 --- a/packages/json/src/matchers.test.ts +++ b/packages/json/src/matchers.test.ts @@ -62,6 +62,36 @@ describe("findCompleteJsonBlock", () => { outputRaw: '【{type:"buttons"}】', }, }, + { + name: "full custom same component twice", + input: '【{type:"buttons"}】【{type:"buttons"}】', + options: { type: "buttons" }, + expected: { + startIndex: 0, + endIndex: 18, + outputRaw: '【{type:"buttons"}】', + }, + }, + { + name: "full custom 2 different components", + input: '【{type:"buttons"}】【{type:"somethingelse"}】', + options: { type: "buttons" }, + expected: { + startIndex: 0, + endIndex: 18, + outputRaw: '【{type:"buttons"}】', + }, + }, + { + name: "full custom 2 different components reversed", + input: '【{type:"somethingelse"}】【{type:"buttons"}】', + options: { type: "buttons" }, + expected: { + startIndex: 24, + endIndex: 42, + outputRaw: '【{type:"buttons"}】', + }, + }, { name: "full custom component with fields", input: '【{type:"buttons", something: "something", else: "else"}】', diff --git a/packages/json/src/matchers.ts b/packages/json/src/matchers.ts index 1d6f7865..3138c1ce 100644 --- a/packages/json/src/matchers.ts +++ b/packages/json/src/matchers.ts @@ -1,5 +1,5 @@ import { LLMOutputMatcher } from "@llm-ui/react"; -import { regexMatcher, removeStartEndChars } from "@llm-ui/shared"; +import { regexMatcherGlobal, removeStartEndChars } from "@llm-ui/shared"; import { JsonBlockOptions, JsonBlockOptionsComplete, @@ -12,18 +12,20 @@ const findJsonBlock = ( options: JsonBlockOptionsComplete, ): LLMOutputMatcher => { const { type } = options; - const matcher = regexMatcher(regex); + const matcher = regexMatcherGlobal(regex); return (llmOutput: string) => { - const match = matcher(llmOutput); - if (!match) { + const matches = matcher(llmOutput); + if (matches.length === 0) { return undefined; } - const block = parseJson5(removeStartEndChars(match.outputRaw, options)); + return matches.find((match) => { + const block = parseJson5(removeStartEndChars(match.outputRaw, options)); - if (!block || block[options.typeKey] !== type) { - return undefined; - } - return match; + if (!block || block[options.typeKey] !== type) { + return undefined; + } + return match; + }); }; }; @@ -32,7 +34,7 @@ export const findCompleteJsonBlock = ( ): LLMOutputMatcher => { const options = getOptions(userOptions); const { startChar, endChar } = options; - const regex = new RegExp(`${startChar}([\\s\\S]*?)${endChar}`); + const regex = new RegExp(`${startChar}([\\s\\S]*?)${endChar}`, "g"); return findJsonBlock(regex, options); }; @@ -41,6 +43,6 @@ export const findPartialJsonBlock = ( ): LLMOutputMatcher => { const options = getOptions(userOptions); const { startChar } = options; - const regex = new RegExp(`${startChar}([\\s\\S]*)`); + const regex = new RegExp(`${startChar}([\\s\\S]*)`, "g"); return findJsonBlock(regex, options); }; diff --git a/packages/shared/src/index.ts b/packages/shared/src/index.ts index 8fb85497..39d0cfac 100644 --- a/packages/shared/src/index.ts +++ b/packages/shared/src/index.ts @@ -1,2 +1,2 @@ -export { regexMatcher } from "./regexMatcher"; +export { regexMatcher, regexMatcherGlobal } from "./regexMatcher"; export { removeStartEndChars } from "./removeStartEndChars"; diff --git a/packages/shared/src/regexMatcher.test.ts b/packages/shared/src/regexMatcher.test.ts index b7821717..234fe916 100644 --- a/packages/shared/src/regexMatcher.test.ts +++ b/packages/shared/src/regexMatcher.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "vitest"; -import { regexMatcher } from "./regexMatcher"; +import { regexMatcher, regexMatcherGlobal } from "./regexMatcher"; describe("regexMatcher", () => { const testCases = [ @@ -44,3 +44,69 @@ describe("regexMatcher", () => { }); }); }); + +describe("regexMatcherGlobal", () => { + const testCases = [ + { + input: "hello", + regex: /hello/g, + expected: [ + { + startIndex: 0, + endIndex: 5, + outputRaw: "hello", + }, + ], + }, + { + input: "abc hello", + regex: /hello/g, + expected: [ + { + startIndex: 4, + endIndex: 9, + outputRaw: "hello", + }, + ], + }, + { + input: "abc hello def", + regex: /hello/g, + expected: [ + { + startIndex: 4, + endIndex: 9, + outputRaw: "hello", + }, + ], + }, + { + input: "abc hello def hello", + regex: /hello/g, + expected: [ + { + startIndex: 4, + endIndex: 9, + outputRaw: "hello", + }, + { + startIndex: 14, + endIndex: 19, + outputRaw: "hello", + }, + ], + }, + { + input: "abc yellow def", + regex: /hello/g, + expected: [], + }, + ]; + + testCases.forEach(({ input, regex, expected }) => { + it(`should match ${input} with ${regex}`, () => { + const result = regexMatcherGlobal(regex)(input); + expect(result).toEqual(expected); + }); + }); +}); diff --git a/packages/shared/src/regexMatcher.ts b/packages/shared/src/regexMatcher.ts index 340c7c45..de9122b4 100644 --- a/packages/shared/src/regexMatcher.ts +++ b/packages/shared/src/regexMatcher.ts @@ -1,18 +1,41 @@ -import { MaybeLLMOutputMatch } from "@llm-ui/react"; +import { LLMOutputMatch, MaybeLLMOutputMatch } from "@llm-ui/react"; + +const regexMatchToLLmOutputMatch = ( + regexMatch: RegExpMatchArray | null, +): MaybeLLMOutputMatch => { + if (regexMatch) { + const matchString = regexMatch[0]; + const startIndex = regexMatch.index!; + const endIndex = startIndex + matchString.length; + return { + startIndex, + endIndex, + outputRaw: matchString, + }; + } + return undefined; +}; export const regexMatcher = (regex: RegExp) => (llmOutput: string): MaybeLLMOutputMatch => { - const regexMatch = llmOutput.match(regex); - if (regexMatch) { - const matchString = regexMatch[0]; - const startIndex = regexMatch.index!; - const endIndex = startIndex + matchString.length; - return { - startIndex, - endIndex, - outputRaw: matchString, - }; + if (regex.global) { + throw new Error("regexMatcher does not support global regexes"); + } + return regexMatchToLLmOutputMatch(llmOutput.match(regex)); + }; + +export const regexMatcherGlobal = + (regex: RegExp) => + (llmOutput: string): LLMOutputMatch[] => { + if (!regex.global) { + throw new Error("regexMatcherGlobal does not support non-global regexes"); + } + const matches = Array.from(llmOutput.matchAll(regex)); + if (!matches) { + return []; } - return undefined; + return matches + .map((m) => regexMatchToLLmOutputMatch(m)) + .filter((m) => m !== undefined) as LLMOutputMatch[]; };