Skip to content

Commit

Permalink
Merge pull request #308 from alexburley/master
Browse files Browse the repository at this point in the history
Charset detection not default as utf-8 #306
  • Loading branch information
tomas committed Apr 9, 2020
2 parents 23a4cbf + 7eceb5a commit 819240f
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 8 deletions.
2 changes: 1 addition & 1 deletion lib/decoder.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ StreamDecoder.prototype._transform = function(chunk, encoding, done) {
var res, found;

// try get charset from chunk, just once
if (this.charset == 'iso-8859-1' && !this.parsed_chunk) {
if (this.charset == 'utf8' && !this.parsed_chunk) {
this.parsed_chunk = true;

var matches = regex.exec(chunk.toString());
Expand Down
5 changes: 2 additions & 3 deletions lib/needle.js
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ function keys_by_type(type) {
function parse_content_type(header) {
if (!header || header === '') return {};

var found, charset = 'iso-8859-1', arr = header.split(';');
var found, charset = 'utf8', arr = header.split(';');

if (arr.length > 1 && (found = arr[1].match(/charset=(.+)/)))
charset = found[1];
Expand Down Expand Up @@ -589,10 +589,9 @@ Needle.prototype.send_request = function(count, method, uri, config, post_data,
// If we're not parsing, and unless decoding was disabled, we'll try
// decoding non UTF-8 bodies to UTF-8, using the iconv-lite library.
} else if (text_response && config.decode_response
&& mime.charset && !mime.charset.match(/utf-?8$/i)) {
&& mime.charset) {
pipeline.push(decoder(mime.charset));
}

// And `out` is the stream we finally push the decoded/parsed output to.
pipeline.push(out);

Expand Down
41 changes: 41 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
"JSONStream": "^1.3.5",
"jschardet": "^1.6.0",
"mocha": "^5.2.0",
"nock": "^12.0.3",
"q": "^1.5.1",
"should": "^13.2.3",
"sinon": "^2.3.0",
Expand Down
36 changes: 32 additions & 4 deletions test/decoder_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@ var should = require('should'),
needle = require('./../'),
Q = require('q'),
chardet = require('jschardet');
nock = require('nock')

describe('character encoding', function() {

var url;
this.timeout(5000);

describe('test A', function() {
describe('Given content-type: "text/html; charset=EUC-JP"', function() {

before(function() {
url = 'http://www.nina.jp/server/slackware/webapp/tomcat_charset.html';
Expand Down Expand Up @@ -46,7 +47,7 @@ describe('character encoding', function() {

})

describe('test B', function() {
describe('Given content-type: "text/html but file is charset: gb2312', function() {

it('encodes to UTF-8', function(done) {

Expand All @@ -71,16 +72,43 @@ describe('character encoding', function() {
chardet.detect(bodies[1]).encoding,
]

// We wanted to decode our first stream.
// We wanted to decode our first stream as specified by options
charsets[0].should.equal('ascii');
bodies[0].indexOf('全球中文网站前二十强').should.not.equal(-1);

// But not our second stream.
// But not our second stream
charsets[1].should.equal('windows-1252');
bodies[1].indexOf('全球中文网站前二十强').should.equal(-1);

done();
});
})
})

describe('Given content-type: "text/html"', function () {
var hungarianUrl = 'https://some.domain.com';
beforeEach(function () {
nock(hungarianUrl)
.get('/')
.reply(200, 'Magyarországi Fióktelepe', {
'content-type': 'text/html',
});
})
describe('with decode = false', function () {

it('decodes by default to utf-8', function (done) {

needle.get(hungarianUrl, { decode: false }, function (err, resp) {
console.log(resp.body)
resp.body.should.be.a.String;
chardet.detect(resp.body).encoding.should.eql('ISO-8859-2');
resp.body.should.eql('Magyarországi Fióktelepe')
done();
})

})

})

})
})

0 comments on commit 819240f

Please sign in to comment.