From e2d570e77cc2720b2c256ec099f3e7e424c39e35 Mon Sep 17 00:00:00 2001 From: bhuvanesh <56244773+bhuvaneshwararaja@users.noreply.github.com> Date: Mon, 9 Oct 2023 19:44:28 +0530 Subject: [PATCH] Allow to skip first lines #738 (#1021) Co-authored-by: Bhuvaneshwara Raja --- docs/docs.html | 11 ++++++++++- papaparse.js | 19 +++++++++++++++++-- player/player.html | 1 + player/player.js | 1 + tests/test-cases.js | 36 ++++++++++++++++++++++++++++++++++++ 5 files changed, 65 insertions(+), 3 deletions(-) diff --git a/docs/docs.html b/docs/docs.html index 49095b17..b9292fad 100644 --- a/docs/docs.html +++ b/docs/docs.html @@ -450,7 +450,8 @@
Default Config With All Options
beforeFirstChunk: undefined, withCredentials: undefined, transform: undefined, - delimitersToGuess: [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP] + delimitersToGuess: [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP], + skipFirstNLines: 0 }
@@ -682,6 +683,14 @@
Config Options
An array of delimiters to guess from if the delimiter option is not set. + + + skipFirstNLines + + + To skip first N number of lines when converting a CSV file to JSON + + diff --git a/papaparse.js b/papaparse.js index b9e41d08..4bc3dc37 100755 --- a/papaparse.js +++ b/papaparse.js @@ -486,6 +486,7 @@ License: MIT } } + /** ChunkStreamer is the base prototype for various streamer implementations. */ function ChunkStreamer(config) { @@ -521,8 +522,23 @@ License: MIT // Rejoin the line we likely just split in two by chunking the file var aggregate = this._partialLine + chunk; + this._pendingSkip = parseInt(this._config.skipFirstNLines) || 0; + this._skipHeader = 0; + if (this._config.header) { + this._skipHeader++; + } + if (this._pendingSkip > 0) { + var splitChunk = aggregate.split('\n'); + var currentChunkLength = splitChunk.length; + if (currentChunkLength <= this._pendingSkip) { + aggregate = this._partialLine; + } + else{ + aggregate = this._partialLine + [...splitChunk.slice(0, this._skipHeader), ...splitChunk.slice(this._skipHeader + this._pendingSkip)].join('\n'); + } + this._pendingSkip -= currentChunkLength; + } this._partialLine = ''; - var results = this._handle.parse(aggregate, this._baseIndex, !this._finished); if (this._handle.paused() || this._handle.aborted()) { @@ -1931,7 +1947,6 @@ License: MIT { return function() { f.apply(self, arguments); }; } - function isFunction(func) { return typeof func === 'function'; diff --git a/player/player.html b/player/player.html index b90fe986..48015e35 100644 --- a/player/player.html +++ b/player/player.html @@ -24,6 +24,7 @@

Papa Parse Player

+ diff --git a/player/player.js b/player/player.js index 8150de64..f8b8e3ac 100644 --- a/player/player.js +++ b/player/player.js @@ -108,6 +108,7 @@ function buildConfig() skipEmptyLines: $('#skipEmptyLines').prop('checked'), chunk: $('#chunk').prop('checked') ? chunkFn : undefined, beforeFirstChunk: undefined, + skipFirstNLines: $('#skipFirstNLines').val() }; function getLineEnding() diff --git a/tests/test-cases.js b/tests/test-cases.js index da489e8c..68f367fc 100644 --- a/tests/test-cases.js +++ b/tests/test-cases.js @@ -1574,6 +1574,42 @@ var PARSE_TESTS = [ data: [['a', 'b', 'c\n'], ['d', 'e', 'f']], errors: [] } + }, + { + description: "Skip First N number of lines , with header and 2 rows", + input: 'a,b,c,d\n1,2,3,4', + config: { header: true, skipFirstNLines: 1 }, + expected: { + data: [], + errors: [] + } + }, + { + description: "Skip First N number of lines , with header and 3 rows", + input: 'a,b,c,d\n1,2,3,4\n4,5,6,7', + config: { header: true, skipFirstNLines: 1 }, + expected: { + data: [{a: '4', b: '5', c: '6', d: '7'}], + errors: [] + } + }, + { + description: "Skip First N number of lines , with header false", + input: 'a,b,c,d\n1,2,3,4\n4,5,6,7', + config: { header: false, skipFirstNLines: 1 }, + expected: { + data: [['1','2','3','4'],['4','5','6','7']], + errors: [] + } + }, + { + description: "Skip First N number of lines , with header false and skipFirstNLines as negative value", + input: 'a,b,c,d\n1,2,3,4\n4,5,6,7', + config: { header: false, skipFirstNLines: -2 }, + expected: { + data: [['a','b','c','d'],['1','2','3','4'],['4','5','6','7']], + errors: [] + } } ];