Skip to content

Commit

Permalink
Refactor header renaming logic to adress #1052, #1007 (#1058)
Browse files Browse the repository at this point in the history
And updated test to match the improved renaming logic.
  • Loading branch information
augustmarowski authored Sep 12, 2024
1 parent cf4bded commit 4af6882
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 52 deletions.
93 changes: 43 additions & 50 deletions papaparse.js
Original file line number Diff line number Diff line change
Expand Up @@ -1480,61 +1480,14 @@ License: MIT
if (!input)
return returnable();

// Rename headers if there are duplicates
var firstLine;
if (config.header && !baseIndex)
{
firstLine = input.split(newline)[0];
var headers = firstLine.split(delim);
var separator = '_';
var headerMap = new Set();
var headerCount = {};
var duplicateHeaders = false;

// Using old-style 'for' loop to avoid prototype pollution that would be picked up with 'var j in headers'
for (var j = 0; j < headers.length; j++) {
var header = headers[j];
if (isFunction(config.transformHeader))
header = config.transformHeader(header, j);
var headerName = header;

var count = headerCount[header] || 0;
if (count > 0) {
duplicateHeaders = true;
headerName = header + separator + count;
// Initialise the variable if it hasn't been.
if (renamedHeaders === null) {
renamedHeaders = {};
}
}
headerCount[header] = count + 1;
// In case it already exists, we add more separators
while (headerMap.has(headerName)) {
headerName = headerName + separator + count;
}
headerMap.add(headerName);
if (count > 0) {
renamedHeaders[headerName] = header;
}
}
if (duplicateHeaders) {
var editedInput = input.split(newline);
editedInput[0] = Array.from(headerMap).join(delim);
input = editedInput.join(newline);
}
}
if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1))
{
var rows = input.split(newline);
for (var i = 0; i < rows.length; i++)
{
row = rows[i];
// use firstline as row length may be changed due to duplicated headers
if (i === 0 && firstLine !== undefined) {
cursor += firstLine.length;
}else{
cursor += row.length;
}
cursor += row.length;

if (i !== rows.length - 1)
cursor += newline.length;
else if (ignoreLastRow)
Expand Down Expand Up @@ -1729,7 +1682,6 @@ License: MIT
break;
}


return finish();


Expand Down Expand Up @@ -1789,6 +1741,47 @@ License: MIT
/** Returns an object with the results, errors, and meta. */
function returnable(stopped)
{
if (config.header && !baseIndex && data.length)
{
const result = data[0];
const headerCount = {}; // To track the count of each base header
const usedHeaders = new Set(result); // To track used headers and avoid duplicates
let duplicateHeaders = false;

for (let i = 0; i < result.length; i++) {
let header = result[i];
if (isFunction(config.transformHeader))
header = config.transformHeader(header, i);

if (!headerCount[header]) {
headerCount[header] = 1;
result[i] = header;
} else {
let newHeader;
let suffixCount = headerCount[header];

// Find a unique new header
do {
newHeader = `${header}_${suffixCount}`;
suffixCount++;
} while (usedHeaders.has(newHeader));

usedHeaders.add(newHeader); // Mark this new Header as used
result[i] = newHeader;
headerCount[header]++;
duplicateHeaders = true;
if (renamedHeaders === null) {
renamedHeaders = {};
}
renamedHeaders[newHeader] = header;
}

usedHeaders.add(header); // Ensure the original header is marked as used
}
if (duplicateHeaders) {
console.warn('Duplicate headers found and renamed.');
}
}
return {
data: data,
errors: errors,
Expand Down
4 changes: 2 additions & 2 deletions tests/test-cases.js
Original file line number Diff line number Diff line change
Expand Up @@ -618,10 +618,10 @@ var CORE_PARSER_TESTS = [
input: 'c,c,c,c_1\n1,2,3,4',
config: { header: true },
expected: {
data: [['c', 'c_1', 'c_2', 'c_1_0'], ['1', '2', '3', '4']],
data: [['c', 'c_2', 'c_3', 'c_1'], ['1', '2', '3', '4']],
errors: [],
meta: {
renamedHeaders: {c_1: 'c', c_2: 'c'},
renamedHeaders: {c_2: 'c', c_3: 'c'},
cursor: 17
}
}
Expand Down

0 comments on commit 4af6882

Please sign in to comment.