From d79e36d988839551dcefdea878eb5fb7eaa19dea Mon Sep 17 00:00:00 2001 From: spruehstuhl Date: Sun, 19 Apr 2015 16:57:40 +0200 Subject: [PATCH] Use StringDecoder for Buffers in WritableStream http://stackoverflow.com/questions/12121775/convert-streamed-buffers-to-utf8-string --- lib/WritableStream.js | 8 ++++++-- test/unicode.js | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 test/unicode.js diff --git a/lib/WritableStream.js b/lib/WritableStream.js index 1b60bb991..9868f8a88 100644 --- a/lib/WritableStream.js +++ b/lib/WritableStream.js @@ -1,21 +1,25 @@ module.exports = Stream; var Parser = require("./Parser.js"), - WritableStream = require("stream").Writable || require("readable-stream").Writable; + WritableStream = require("stream").Writable || require("readable-stream").Writable, + StringDecoder = require("string_decoder").StringDecoder, + Buffer = require("buffer").Buffer; function Stream(cbs, options){ var parser = this._parser = new Parser(cbs, options); + var decoder = this._decoder = new StringDecoder(); WritableStream.call(this, {decodeStrings: false}); this.once("finish", function(){ - parser.end(); + parser.end(decoder.end()); }); } require("inherits")(Stream, WritableStream); WritableStream.prototype._write = function(chunk, encoding, cb){ + if(chunk instanceof Buffer) chunk = this._decoder.write(chunk); this._parser.write(chunk); cb(); }; \ No newline at end of file diff --git a/test/unicode.js b/test/unicode.js new file mode 100644 index 000000000..602b4ca7a --- /dev/null +++ b/test/unicode.js @@ -0,0 +1,21 @@ +var htmlparser2 = require(".."), + assert = require("assert"); + +describe("WritableStream", function(){ + + it("should decode fragmented unicode characters", function(){ + var processed = false; + var stream = new htmlparser2.WritableStream({ + ontext: function(text){ + assert.equal(text, "€"); + processed = true; + } + }); + + stream.write(new Buffer([0xE2, 0x82])); + stream.write(new Buffer([0xAC])); + stream.end(); + + assert(processed); + }); +}); \ No newline at end of file