Skip to content

Commit

Permalink
Only skip first n lines in the first chunk and don't take the first l…
Browse files Browse the repository at this point in the history
…ine as header (mholt#1045)
  • Loading branch information
jkruke committed Mar 7, 2024
1 parent a39383e commit 28feeda
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 19 deletions.
21 changes: 5 additions & 16 deletions papaparse.js
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,11 @@ License: MIT
this.parseChunk = function(chunk, isFakeChunk)
{
// First chunk pre-processing
const skipFirstNLines = parseInt(this._config.skipFirstNLines) || 0;
if (this.isFirstChunk && skipFirstNLines > 0) {
const splitChunk = chunk.split('\n');
chunk = [...splitChunk.slice(skipFirstNLines)].join('\n');
}
if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk))
{
var modifiedChunk = this._config.beforeFirstChunk(chunk);
Expand All @@ -522,22 +527,6 @@ License: MIT

// Rejoin the line we likely just split in two by chunking the file
var aggregate = this._partialLine + chunk;
this._pendingSkip = parseInt(this._config.skipFirstNLines) || 0;
this._skipHeader = 0;
if (this._config.header) {
this._skipHeader++;
}
if (this._pendingSkip > 0) {
var splitChunk = aggregate.split('\n');
var currentChunkLength = splitChunk.length;
if (currentChunkLength <= this._pendingSkip) {
aggregate = this._partialLine;
}
else{
aggregate = this._partialLine + [...splitChunk.slice(0, this._skipHeader), ...splitChunk.slice(this._skipHeader + this._pendingSkip)].join('\n');
}
this._pendingSkip -= currentChunkLength;
}
this._partialLine = '';
var results = this._handle.parse(aggregate, this._baseIndex, !this._finished);

Expand Down
6 changes: 3 additions & 3 deletions tests/test-cases.js
Original file line number Diff line number Diff line change
Expand Up @@ -1585,11 +1585,11 @@ var PARSE_TESTS = [
}
},
{
description: "Skip First N number of lines , with header and 3 rows",
input: 'a,b,c,d\n1,2,3,4\n4,5,6,7',
description: "Skip First N number of lines , with header and 2 rows",
input: 'to-be-ignored\na,b,c,d\n1,2,3,4',
config: { header: true, skipFirstNLines: 1 },
expected: {
data: [{a: '4', b: '5', c: '6', d: '7'}],
data: [{a: '1', b: '2', c: '3', d: '4'}],
errors: []
}
},
Expand Down

0 comments on commit 28feeda

Please sign in to comment.