Skip to content

Commit

Permalink
test(csv): add grapheme length test (#5304)
Browse files Browse the repository at this point in the history
  • Loading branch information
timreichen committed Jul 4, 2024
1 parent 9d5d887 commit ae95651
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 8 deletions.
14 changes: 10 additions & 4 deletions csv/_io.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
// https://github.com/golang/go/blob/master/LICENSE
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.

import { graphemeLength } from "./_shared.ts";

/** Options for {@linkcode parseRecord}. */
export interface ReadOptions {
/** Character which separates values.
Expand Down Expand Up @@ -94,7 +96,9 @@ export async function parseRecord(
if (!opt.lazyQuotes) {
const j = field.indexOf(quote);
if (j >= 0) {
const col = fullLine.length + j - line.length;
const col = graphemeLength(
fullLine.slice(0, fullLine.length - line.slice(j).length),
);
throw new ParseError(startLine + 1, lineIndex, col, ERR_BARE_QUOTE);
}
}
Expand Down Expand Up @@ -132,7 +136,9 @@ export async function parseRecord(
recordBuffer += quote;
} else {
// `"*` sequence (invalid non-escaped quote).
const col = fullLine.length - line.length - quoteLen;
const col = graphemeLength(
fullLine.slice(0, fullLine.length - line.length - quoteLen),
);
throw new ParseError(startLine + 1, lineIndex, col, ERR_QUOTE);
}
} else if (line.length > 0 || !reader.isEOF()) {
Expand All @@ -145,7 +151,7 @@ export async function parseRecord(
if (r === null) {
// Abrupt end of file (EOF or error).
if (!opt.lazyQuotes) {
const col = fullLine.length;
const col = graphemeLength(fullLine);
throw new ParseError(startLine + 1, lineIndex, col, ERR_QUOTE);
}
fieldIndexes.push(recordBuffer.length);
Expand All @@ -155,7 +161,7 @@ export async function parseRecord(
} else {
// Abrupt end of file (EOF on error).
if (!opt.lazyQuotes) {
const col = fullLine.length;
const col = graphemeLength(fullLine);
throw new ParseError(startLine + 1, lineIndex, col, ERR_QUOTE);
}
fieldIndexes.push(recordBuffer.length);
Expand Down
15 changes: 15 additions & 0 deletions csv/_shared.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.

/**
* returns length of a string considering surrogate pairs
* ```ts
* function graphemeLength(s: string) {
* return Array.from(s).length;
* }
* graphemeLength("🐱") // 1
* "🐱".length // 2
* ```
*/
export function graphemeLength(s: string) {
return Array.from(s).length;
}
13 changes: 9 additions & 4 deletions csv/parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import {
type ReadOptions,
type RecordWithColumn,
} from "./_io.ts";
import { graphemeLength } from "./_shared.ts";

export { ParseError, type ParseResult, type RecordWithColumn };

Expand Down Expand Up @@ -108,7 +109,9 @@ class Parser {
if (!this.#options.lazyQuotes) {
const j = field.indexOf(quote);
if (j >= 0) {
const col = fullLine.length + j - line.length;
const col = graphemeLength(
fullLine.slice(0, fullLine.length - line.slice(j).length),
);
throw new ParseError(startLine + 1, lineIndex, col, ERR_BARE_QUOTE);
}
}
Expand Down Expand Up @@ -146,7 +149,9 @@ class Parser {
recordBuffer += quote;
} else {
// `"*` sequence (invalid non-escaped quote).
const col = fullLine.length - line.length - quoteLen;
const col = graphemeLength(
fullLine.slice(0, fullLine.length - line.length - quoteLen),
);
throw new ParseError(startLine + 1, lineIndex, col, ERR_QUOTE);
}
} else if (line.length > 0 || !(this.#isEOF())) {
Expand All @@ -159,7 +164,7 @@ class Parser {
if (r === null) {
// Abrupt end of file (EOF or error).
if (!this.#options.lazyQuotes) {
const col = fullLine.length;
const col = graphemeLength(fullLine);
throw new ParseError(startLine + 1, lineIndex, col, ERR_QUOTE);
}
fieldIndexes.push(recordBuffer.length);
Expand All @@ -169,7 +174,7 @@ class Parser {
} else {
// Abrupt end of file (EOF on error).
if (!this.#options.lazyQuotes) {
const col = fullLine.length;
const col = graphemeLength(fullLine);
throw new ParseError(startLine + 1, lineIndex, col, ERR_QUOTE);
}
fieldIndexes.push(recordBuffer.length);
Expand Down
11 changes: 11 additions & 0 deletions csv/parse_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,17 @@ Deno.test({
);
},
});
await t.step({
name: "error column grapheme number",
fn() {
const input = `a,b,🐱"`;
assertThrows(
() => parse(input),
ParseError,
'parse error on line 1, column 5: bare " in non-quoted-field',
);
},
});
await t.step({
name: "TrimQuote",
fn() {
Expand Down

0 comments on commit ae95651

Please sign in to comment.