diff --git a/README.md b/README.md index b9a6e4e..0c818fb 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ var concat = require('concat-stream'); var removeBOM = require('remove-bom-stream'); fs.createReadStream('utf8-file-with-bom.txt') - .pipe(removeBOM()) + .pipe(removeBOM('utf-8')) .pipe( concat(function (result) { // result won't have a BOM @@ -28,9 +28,9 @@ fs.createReadStream('utf8-file-with-bom.txt') ## API -### `removeBOM()` +### `removeBOM(encoding)` -Returns a `through2` stream that will remove a BOM, given the data is a UTF8 Buffer with a BOM at the beginning. If the data is not UTF8 or does not have a BOM, the data is not changed and this becomes a normal passthrough stream. +Returns a `through2` stream that will remove a BOM, if the argument `encoding` is `'utf-8'` and the given data is a UTF8 Buffer with a BOM at the beginning. If the `encoding` is not `'utf-8'` or does not have a BOM, the data is not changed and this becomes a normal passthrough stream. ## License diff --git a/index.js b/index.js index 6047d4b..7a707b5 100644 --- a/index.js +++ b/index.js @@ -1,51 +1,56 @@ 'use strict'; var through = require('through2'); -var removeBom = require('remove-bom-buffer'); +var TextDecoder = require('util').TextDecoder; -function removeBomStream() { - var state = 0; // 0:Not removed, -1:In removing, 1:Already removed - var buffer = Buffer.alloc(0); +var BOM = '\ufeff'; - return through(onChunk, onFlush); +function removeBomStream(encoding) { + encoding = (encoding || '').toLowerCase(); + var isUTF8 = (encoding === 'utf-8' || encoding === 'utf8'); - function removeAndCleanup(data) { - state = 1; // Already removed + // Needed due to https://github.com/nodejs/node/pull/42779 + if (!isUTF8) { + return through(); + } - buffer = null; + // Only used if encoding is UTF-8 + var decoder = new TextDecoder('utf-8', { ignoreBOM: false }); - return removeBom(data); - } + var state = 0; // 0:Not removed, -1:In removing, 1:Already removed + + return through(onChunk); - function onChunk(data, enc, cb) { + function onChunk(data, _, cb) { if (state === 1) { - return cb(null, data); + cb(null, data); + return; } - if (state === 0 /* Not removed */ && data.length >= 7) { - return cb(null, removeAndCleanup(data)); - } + try { + state = -1; - state = -1; // In removing + var chunk = decoder.decode(data, { stream: true }); - var bufferLength = buffer.length; - var chunkLength = data.length; - var totalLength = bufferLength + chunkLength; + // The first time we have data after a decode, it should have already removed the BOM + if (chunk !== '') { + chunk += decoder.decode(); // end of stream mode and clear inner buffer. - buffer = Buffer.concat([buffer, data], totalLength); + // Node<=v12, TextDecoder#decode returns a BOM if it receives a BOM separately. + // Ref https://github.com/nodejs/node/pull/30132 + if (chunk !== BOM) { + state = 1; + var buffer = Buffer.from(chunk, 'utf-8'); - if (totalLength >= 7) { - return cb(null, removeAndCleanup(buffer)); - } - cb(); - } + cb(null, buffer); + return; + } + } - function onFlush(cb) { - if (state === 2 /* Already removed */ || !buffer) { - return cb(); + cb(); + } catch (err) { + cb(err); } - - cb(null, removeAndCleanup(buffer)); } } diff --git a/package.json b/package.json index ca167de..be77b04 100644 --- a/package.json +++ b/package.json @@ -22,7 +22,6 @@ "test": "nyc mocha --async-only" }, "dependencies": { - "remove-bom-buffer": "^3.0.0", "through2": "^4.0.2" }, "devDependencies": { diff --git a/test/index.js b/test/index.js index 5789090..b8c9be7 100644 --- a/test/index.js +++ b/test/index.js @@ -24,7 +24,7 @@ describe('removeBomStream', function () { } pipe( - [fs.createReadStream(filepath), removeBomStream(), concat(assert)], + [fs.createReadStream(filepath), removeBomStream('utf-8'), concat(assert)], done ); }); @@ -33,7 +33,7 @@ describe('removeBomStream', function () { var filepath = path.join(__dirname, './fixtures/test.txt'); var fileContent = fs.readFileSync(filepath, 'utf-8'); - var rmBom = removeBomStream(); + var rmBom = removeBomStream('utf8'); var output = ''; rmBom.on('data', function (d) { output += d.toString(); @@ -55,7 +55,7 @@ describe('removeBomStream', function () { } pipe( - [fs.createReadStream(filepath), removeBomStream(), concat(assert)], + [fs.createReadStream(filepath), removeBomStream('UTF-8'), concat(assert)], done ); }); @@ -73,7 +73,7 @@ describe('removeBomStream', function () { [ fs.createReadStream(filepath), chunker(1), - removeBomStream(), + removeBomStream('UTF8'), concat(assert), ], done @@ -92,7 +92,7 @@ describe('removeBomStream', function () { } pipe( - [fs.createReadStream(filepath), removeBomStream(), concat(assert)], + [fs.createReadStream(filepath), removeBomStream('UTF-8'), concat(assert)], done ); }); @@ -101,7 +101,7 @@ describe('removeBomStream', function () { var filepath = path.join(__dirname, './fixtures/bom-utf8.txt'); var fileContent = fs.readFileSync(filepath, 'utf-8'); - var rmBom = removeBomStream(); + var rmBom = removeBomStream('utf-8'); var output = ''; rmBom.on('data', function (d) { output += d.toString(); @@ -123,7 +123,7 @@ describe('removeBomStream', function () { } pipe( - [fs.createReadStream(filepath), removeBomStream(), concat(assert)], + [fs.createReadStream(filepath), removeBomStream('utf-16be'), concat(assert)], done ); }); @@ -138,7 +138,7 @@ describe('removeBomStream', function () { } pipe( - [fs.createReadStream(filepath), removeBomStream(), concat(assert)], + [fs.createReadStream(filepath), removeBomStream('utf-16be'), concat(assert)], done ); }); @@ -153,7 +153,7 @@ describe('removeBomStream', function () { } pipe( - [fs.createReadStream(filepath), removeBomStream(), concat(assert)], + [fs.createReadStream(filepath), removeBomStream('utf-16le'), concat(assert)], done ); });