From d8a612f3a3ddcf583e09252990e2026272f6a88c Mon Sep 17 00:00:00 2001 From: Stainless Bot Date: Wed, 6 Mar 2024 16:42:28 +0000 Subject: [PATCH] fix(streaming): correctly handle trailing new lines in byte chunks --- src/streaming.ts | 8 +++++++- tests/streaming.test.ts | 42 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 tests/streaming.test.ts diff --git a/src/streaming.ts b/src/streaming.ts index 7d8b4442a..1b59bce20 100644 --- a/src/streaming.ts +++ b/src/streaming.ts @@ -267,7 +267,7 @@ class SSEDecoder { * * https://github.com/encode/httpx/blob/920333ea98118e9cf617f246905d7b202510941c/httpx/_decoders.py#L258 */ -class LineDecoder { +export class LineDecoder { // prettier-ignore static NEWLINE_CHARS = new Set(['\n', '\r', '\x0b', '\x0c', '\x1c', '\x1d', '\x1e', '\x85', '\u2028', '\u2029']); static NEWLINE_REGEXP = /\r\n|[\n\r\x0b\x0c\x1c\x1d\x1e\x85\u2028\u2029]/g; @@ -300,6 +300,12 @@ class LineDecoder { const trailingNewline = LineDecoder.NEWLINE_CHARS.has(text[text.length - 1] || ''); let lines = text.split(LineDecoder.NEWLINE_REGEXP); + // if there is a trailing new line then the last entry will be an empty + // string which we don't care about + if (trailingNewline) { + lines.pop(); + } + if (lines.length === 1 && !trailingNewline) { this.buffer.push(lines[0]!); return []; diff --git a/tests/streaming.test.ts b/tests/streaming.test.ts new file mode 100644 index 000000000..45cf6f6cd --- /dev/null +++ b/tests/streaming.test.ts @@ -0,0 +1,42 @@ +import { LineDecoder } from 'openai/streaming'; + +function decodeChunks(chunks: string[], decoder?: LineDecoder): string[] { + if (!decoder) { + decoder = new LineDecoder(); + } + + const lines = []; + for (const chunk of chunks) { + lines.push(...decoder.decode(chunk)); + } + + return lines; +} + +describe('line decoder', () => { + test('basic', () => { + // baz is not included because the line hasn't ended yet + expect(decodeChunks(['foo', ' bar\nbaz'])).toEqual(['foo bar']); + }); + + test('basic with \\r', () => { + // baz is not included because the line hasn't ended yet + expect(decodeChunks(['foo', ' bar\r\nbaz'])).toEqual(['foo bar']); + }); + + test('trailing new lines', () => { + expect(decodeChunks(['foo', ' bar', 'baz\n', 'thing\n'])).toEqual(['foo barbaz', 'thing']); + }); + + test('trailing new lines with \\r', () => { + expect(decodeChunks(['foo', ' bar', 'baz\r\n', 'thing\r\n'])).toEqual(['foo barbaz', 'thing']); + }); + + test('escaped new lines', () => { + expect(decodeChunks(['foo', ' bar\\nbaz\n'])).toEqual(['foo bar\\nbaz']); + }); + + test('escaped new lines with \\r', () => { + expect(decodeChunks(['foo', ' bar\\r\\nbaz\n'])).toEqual(['foo bar\\r\\nbaz']); + }); +});