From fae9daf4c4c670b4e5a27be8e67597ec372bae79 Mon Sep 17 00:00:00 2001 From: shields Date: Thu, 27 May 2021 01:03:57 +0900 Subject: [PATCH 1/2] Better handling of Content-Encoding decompression 1. Net::HTTP should always decompress, if possible. 2. HTTParty should decompress "br" (Brotli) and "compress" (LZW) if the required gems are present. 3. Add :skip_decompression option to disable both Net::HTTP and HTTParty decompression. 4. Add "HTTP Compression" section to docs/README.md --- docs/README.md | 65 +++++++++++++ lib/httparty.rb | 17 ++++ lib/httparty/decompressor.rb | 92 ++++++++++++++++++ lib/httparty/parser.rb | 8 +- lib/httparty/request.rb | 29 +++++- spec/httparty/decompressor_spec.rb | 135 ++++++++++++++++++++++++++ spec/httparty/request_spec.rb | 148 ++++++++++++++++++++++++++--- spec/httparty_spec.rb | 1 + 8 files changed, 474 insertions(+), 21 deletions(-) create mode 100644 lib/httparty/decompressor.rb create mode 100644 spec/httparty/decompressor_spec.rb diff --git a/docs/README.md b/docs/README.md index c6d57d4e..d28ee947 100644 --- a/docs/README.md +++ b/docs/README.md @@ -104,3 +104,68 @@ class Client end end ``` + +### HTTP Compression + +The `Accept-Encoding` request header and `Content-Encoding` response header +are used to control compression (gzip, etc.) over the wire. Refer to +[RFC-2616](https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html) for details. +(For clarity: these headers are **not** used for character encoding i.e. `utf-8` +which is specified in the `Accept` and `Content-Type` headers.) + +Unless you have specific requirements otherwise, we recommend to **not** set +set the `Accept-Encoding` header on HTTParty requests. In this case, `Net::HTTP` +will set a sensible default compression scheme and automatically decompress the response. + +If you explicitly set `Accept-Encoding`, there be dragons: + +* If the HTTP response `Content-Encoding` received on the wire is `gzip` or `deflate`, + `Net::HTTP` will automatically decompress it, and will omit `Content-Encoding` + from your `HTTParty::Response` headers. + +* For encodings `br` (Brotli) or `compress` (LZW), HTTParty will automatically + decompress if you include the `brotli` or `ruby-lzws` gems respectively into your project. + **Warning:** Support for these encodings is experimental and not fully battle-tested. + Similar to above, if decompression succeeds, `Content-Encoding` will be omitted + from your `HTTParty::Response` headers. + +* For other encodings, `HTTParty::Response#body` will return the raw uncompressed byte string, + and you'll need to inspect the `Content-Encoding` response header and decompress it yourself. + In this case, `HTTParty::Response#parsed_response` will be `nil`. + +* Lastly, you may use the `skip_decompression` option to disable all automatic decompression + and always get `HTTParty::Response#body` in its raw form along with the `Content-Encoding` header. + +```ruby +# Accept-Encoding=gzip,deflate can be safely assumed to be auto-decompressed + +res = HTTParty.get('https://example.com/test.json', headers: { 'Accept-Encoding' => 'gzip,deflate,identity' }) +JSON.parse(res.body) # safe + + +# Accept-Encoding=br,compress requires third-party gems + +require 'brotli' +require 'lzws' +res = HTTParty.get('https://example.com/test.json', headers: { 'Accept-Encoding' => 'br,compress' }) +JSON.parse(res.body) + + +# Accept-Encoding=* may return unhandled Content-Encoding + +res = HTTParty.get('https://example.com/test.json', headers: { 'Accept-Encoding' => '*' }) +encoding = res.headers['Content-Encoding'] +if encoding +JSON.parse(your_decompression_handling(res.body, encoding)) +else +# Content-Encoding not present implies decompressed +JSON.parse(res.body) +end + + +# Gimme the raw data! + +res = HTTParty.get('https://example.com/test.json', skip_decompression: true) +encoding = res.headers['Content-Encoding'] +JSON.parse(your_decompression_handling(res.body, encoding)) +``` diff --git a/lib/httparty.rb b/lib/httparty.rb index 2e9a5ff7..37e174f2 100644 --- a/lib/httparty.rb +++ b/lib/httparty.rb @@ -18,6 +18,7 @@ require 'httparty/logger/logger' require 'httparty/request/body' require 'httparty/response_fragment' +require 'httparty/decompressor' require 'httparty/text_encoder' require 'httparty/headers_processor' @@ -401,6 +402,22 @@ def ssl_version(version) default_options[:ssl_version] = version end + # Deactivate automatic decompression of the response body. + # This will require you to explicitly handle body decompression + # by inspecting the Content-Encoding response header. + # + # Refer to docs/README.md "HTTP Compression" section for + # further details. + # + # @example + # class Foo + # include HTTParty + # skip_decompression + # end + def skip_decompression(value = true) + default_options[:skip_decompression] = !!value + end + # Allows setting of SSL ciphers to use. This only works in Ruby 1.9+. # You can get a list of valid specific ciphers from OpenSSL::Cipher.ciphers. # You also can specify a cipher suite here, listed here at openssl.org: diff --git a/lib/httparty/decompressor.rb b/lib/httparty/decompressor.rb new file mode 100644 index 00000000..bfa0de06 --- /dev/null +++ b/lib/httparty/decompressor.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +module HTTParty + # Decompresses the response body based on the Content-Encoding header. + # + # Net::HTTP automatically decompresses Content-Encoding values "gzip" and "deflate". + # This class will handle "br" (Brotli) and "compress" (LZW) if the requisite + # gems are installed. Otherwise, it returns nil if the body data cannot be + # decompressed. + # + # @abstract Read the HTTP Compression section for more information. + class Decompressor + + # "gzip" and "deflate" are handled by Net::HTTP + # hence they do not need to be handled by HTTParty + SupportedEncodings = { + 'none' => :none, + 'identity' => :none, + 'br' => :brotli, + 'compress' => :lzw + }.freeze + + # The response body of the request + # @return [String] + attr_reader :body + + # The Content-Encoding algorithm used to encode the body + # @return [Symbol] e.g. :gzip + attr_reader :encoding + + # @param [String] body - the response body of the request + # @param [Symbol] encoding - the Content-Encoding algorithm used to encode the body + def initialize(body, encoding) + @body = body + @encoding = encoding + end + + # Perform decompression on the response body + # @return [String] the decompressed body + # @return [nil] when the response body is nil or cannot decompressed + def decompress + return nil if body.nil? + return body if encoding.nil? || encoding.strip.empty? + + if supports_encoding? + decompress_supported_encoding + else + nil + end + end + + protected + + def supports_encoding? + SupportedEncodings.keys.include?(encoding) + end + + def decompress_supported_encoding + method = SupportedEncodings[encoding] + if respond_to?(method, true) + send(method) + else + raise NotImplementedError, "#{self.class.name} has not implemented a decompression method for #{encoding.inspect} encoding." + end + end + + def none + body + end + + def brotli + return nil unless defined?(::Brotli) + begin + ::Brotli.inflate(body) + rescue StandardError + nil + end + end + + def lzw + begin + if defined?(::LZWS::String) + ::LZWS::String.decompress(body) + elsif defined?(::LZW::Simple) + ::LZW::Simple.new.decompress(body) + end + rescue StandardError + nil + end + end + end +end diff --git a/lib/httparty/parser.rb b/lib/httparty/parser.rb index 72316796..272b9fd2 100644 --- a/lib/httparty/parser.rb +++ b/lib/httparty/parser.rb @@ -144,9 +144,11 @@ def supports_format? end def parse_supported_format - send(format) - rescue NoMethodError => e - raise NotImplementedError, "#{self.class.name} has not implemented a parsing method for the #{format.inspect} format.", e.backtrace + if respond_to?(format, true) + send(format) + else + raise NotImplementedError, "#{self.class.name} has not implemented a parsing method for the #{format.inspect} format." + end end end end diff --git a/lib/httparty/request.rb b/lib/httparty/request.rb index a79c9a68..09b2d229 100644 --- a/lib/httparty/request.rb +++ b/lib/httparty/request.rb @@ -229,6 +229,8 @@ def setup_raw_request @raw_request.body = body.call end + @raw_request.instance_variable_set(:@decode_content, decompress_content?) + if options[:basic_auth] && send_authorization_header? @raw_request.basic_auth(username, password) @credentials_sent = true @@ -240,6 +242,10 @@ def digest_auth? !!options[:digest_auth] end + def decompress_content? + !options[:skip_decompression] + end + def response_unauthorized? !!last_response && last_response.code == '401' end @@ -271,7 +277,7 @@ def assume_utf16_is_big_endian options[:assume_utf16_is_big_endian] end - def handle_response(body, &block) + def handle_response(raw_body, &block) if response_redirects? options[:limit] -= 1 if options[:logger] @@ -292,9 +298,20 @@ def handle_response(body, &block) capture_cookies(last_response) perform(&block) else - body ||= last_response.body - body = body.nil? ? body : encode_text(body, last_response['content-type']) - Response.new(self, last_response, lambda { parse_response(body) }, body: body) + raw_body ||= last_response.body + + body = decompress(raw_body, last_response['content-encoding']) unless raw_body.nil? + + unless body.nil? + body = encode_text(body, last_response['content-type']) + + if decompress_content? + last_response.delete('content-encoding') + raw_body = body + end + end + + Response.new(self, last_response, lambda { parse_response(body) }, body: raw_body) end end @@ -370,6 +387,10 @@ def set_basic_auth_from_uri end end + def decompress(body, encoding) + Decompressor.new(body, encoding).decompress + end + def encode_text(text, content_type) TextEncoder.new( text, diff --git a/spec/httparty/decompressor_spec.rb b/spec/httparty/decompressor_spec.rb new file mode 100644 index 00000000..28872bf5 --- /dev/null +++ b/spec/httparty/decompressor_spec.rb @@ -0,0 +1,135 @@ +require 'spec_helper' + +RSpec.describe HTTParty::Decompressor do + describe '.SupportedEncodings' do + it 'returns a hash' do + expect(HTTParty::Decompressor::SupportedEncodings).to be_instance_of(Hash) + end + end + + describe '#decompress' do + let(:body) { 'body' } + let(:encoding) { 'none' } + let(:decompressor) { described_class.new(body, encoding) } + subject { decompressor.decompress } + + shared_examples 'returns nil' do + it { expect(subject).to be_nil } + end + + shared_examples 'returns the body' do + it { expect(subject).to eq 'body' } + end + + context 'when body is nil' do + let(:body) { nil } + it_behaves_like 'returns nil' + end + + context 'when body is blank' do + let(:body) { ' ' } + it { expect(subject).to eq ' ' } + end + + context 'when encoding is nil' do + let(:encoding) { nil } + it_behaves_like 'returns the body' + end + + context 'when encoding is blank' do + let(:encoding) { ' ' } + it_behaves_like 'returns the body' + end + + context 'when encoding is "none"' do + let(:encoding) { 'none' } + it_behaves_like 'returns the body' + end + + context 'when encoding is "identity"' do + let(:encoding) { 'identity' } + it_behaves_like 'returns the body' + end + + context 'when encoding is unsupported' do + let(:encoding) { 'invalid' } + it_behaves_like 'returns nil' + end + + context 'when encoding is "br"' do + let(:encoding) { 'br' } + + context 'when brotli gem not included' do + it_behaves_like 'returns nil' + end + + context 'when brotli included' do + before do + dbl = double('Brotli') + expect(dbl).to receive(:inflate).with('body').and_return('foobar') + stub_const('Brotli', dbl) + end + + it { expect(subject).to eq 'foobar' } + end + + context 'when brotli raises error' do + before do + dbl = double('brotli') + expect(dbl).to receive(:inflate).with('body') { raise RuntimeError.new('brotli error') } + stub_const('Brotli', dbl) + end + + it { expect(subject).to eq nil } + end + end + + context 'when encoding is "compress"' do + let(:encoding) { 'compress' } + + context 'when LZW gem not included' do + it_behaves_like 'returns nil' + end + + context 'when ruby-lzws included' do + before do + dbl = double('lzws') + expect(dbl).to receive(:decompress).with('body').and_return('foobar') + stub_const('LZWS::String', dbl) + end + + it { expect(subject).to eq 'foobar' } + end + + context 'when ruby-lzws raises error' do + before do + dbl = double('lzws') + expect(dbl).to receive(:decompress).with('body') { raise RuntimeError.new('brotli error') } + stub_const('LZWS::String', dbl) + end + + it { expect(subject).to eq nil } + end + + context 'when compress-lzw included' do + before do + dbl2 = double('lzw2') + dbl = double('lzw1', new: dbl2) + expect(dbl2).to receive(:decompress).with('body').and_return('foobar') + stub_const('LZW::Simple', dbl) + end + + it { expect(subject).to eq 'foobar' } + end + + context 'when compress-lzw raises error' do + before do + dbl2 = double('lzw2') + dbl = double('lzw1', new: dbl2) + expect(dbl2).to receive(:decompress).with('body') { raise RuntimeError.new('brotli error') } + stub_const('LZW::Simple', dbl) + end + end + end + end +end diff --git a/spec/httparty/request_spec.rb b/spec/httparty/request_spec.rb index 4b4c237a..c22b728c 100644 --- a/spec/httparty/request_spec.rb +++ b/spec/httparty/request_spec.rb @@ -505,6 +505,55 @@ expect(@request.perform.headers).to eq({ "key" => ["value"] }) end + describe 'decompression' do + it 'should remove the Content-Encoding header if uncompressed' do + @request.options[:format] = :html + response = stub_response 'Content' + response.initialize_http_header('Content-Encoding' => 'none') + + resp = @request.perform + expect(resp.body).to eq('Content') + expect(resp.headers).to eq({}) + expect(resp.parsed_response).to eq('Content') + end + + it 'should decompress the body and remove the Content-Encoding header' do + @request.options[:format] = :html + stub_const('Brotli', double('Brotli', inflate: 'foobar')) + response = stub_response 'Content' + response.initialize_http_header('Content-Encoding' => 'br') + + resp = @request.perform + expect(resp.body).to eq('foobar') + expect(resp.headers).to eq({}) + expect(resp.parsed_response).to eq('foobar') + end + + it 'should not decompress the body if the :skip_decompression option is set' do + @request.options[:format] = :html + @request.options[:skip_decompression] = true + stub_const('Brotli', double('Brotli', inflate: 'foobar')) + response = stub_response 'Content' + response.initialize_http_header('Content-Encoding' => 'br') + + resp = @request.perform + expect(resp.body).to eq('Content') + expect(resp.headers).to eq({ 'Content-Encoding' => 'br' }) + expect(resp.parsed_response).to eq('foobar') + end + + it 'should not decompress unrecognized Content-Encoding' do + @request.options[:format] = :html + response = stub_response 'Content' + response.initialize_http_header('Content-Encoding' => 'bad') + + resp = @request.perform + expect(resp.body).to eq('Content') + expect(resp.headers).to eq({ 'Content-Encoding' => 'bad' }) + expect(resp.parsed_response).to eq(nil) + end + end + if "".respond_to?(:encoding) context 'when body has ascii-8bit encoding' do let(:response) { stub_response "Content".force_encoding('ascii-8bit') } @@ -1199,8 +1248,6 @@ expect(@request.perform.parsed_response).to eq({"hash" => {"foo" => "bar"}}) end - - it "should keep track of cookies between redirects" do @redirect['Set-Cookie'] = 'foo=bar; name=value; HTTPOnly' @request.perform @@ -1368,23 +1415,96 @@ end end - context 'with Accept-Encoding header' do - it 'should disable content decoding if present' do - request = HTTParty::Request.new(Net::HTTP::Get, 'http://api.foo.com/v1', headers:{'Accept-Encoding' => 'custom'}) + context 'Net::HTTP decompression' do + + subject(:raw_request) do + request = HTTParty::Request.new(Net::HTTP::Get, 'http://api.foo.com/v1', headers.merge(options)) request.send(:setup_raw_request) - expect(request.instance_variable_get(:@raw_request).decode_content).to eq(false) + request.instance_variable_get(:@raw_request) end - it 'should disable content decoding if present and lowercase' do - request = HTTParty::Request.new(Net::HTTP::Get, 'http://api.foo.com/v1', headers:{'accept-encoding' => 'custom'}) - request.send(:setup_raw_request) - expect(request.instance_variable_get(:@raw_request).decode_content).to eq(false) + shared_examples 'sets custom Accept-Encoding' do + it { expect(subject['Accept-Encoding']).to eq('custom') } end - it 'should disable content decoding if present' do - request = HTTParty::Request.new(Net::HTTP::Get, 'http://api.foo.com/v1') - request.send(:setup_raw_request) - expect(request.instance_variable_get(:@raw_request).decode_content).to eq(true) + shared_examples 'sets default Accept-Encoding' do + it { expect(subject['Accept-Encoding']).to eq('gzip;q=1.0,deflate;q=0.6,identity;q=0.3') } + end + + shared_examples 'enables Net::HTTP decompression' do + it { expect(subject.decode_content).to eq(true) } + end + + shared_examples 'disables Net::HTTP decompression' do + it { expect(subject.decode_content).to eq(false) } + end + + context 'with skip_decompression false (default)' do + let(:options) { {} } + + context 'with Accept-Encoding specified' do + let(:headers) { { headers: { 'Accept-Encoding' => 'custom' } } } + it_behaves_like 'sets custom Accept-Encoding' + it_behaves_like 'enables Net::HTTP decompression' + end + + context 'with accept-encoding (lowercase) specified' do + let(:headers) { { headers: { 'accept-encoding' => 'custom' } } } + it_behaves_like 'sets custom Accept-Encoding' + it_behaves_like 'enables Net::HTTP decompression' + end + + context 'with Accept-Encoding and other headers specified' do + let(:headers) { { headers: { 'Accept-Encoding' => 'custom', 'Content-Type' => 'application/json' } } } + it_behaves_like 'sets custom Accept-Encoding' + it_behaves_like 'enables Net::HTTP decompression' + end + + context 'with other headers specified' do + let(:headers) { { 'Content-Type' => 'application/json' } } + it_behaves_like 'sets default Accept-Encoding' + it_behaves_like 'enables Net::HTTP decompression' + end + + context 'with no headers specified' do + let(:headers) { {} } + it_behaves_like 'sets default Accept-Encoding' + it_behaves_like 'enables Net::HTTP decompression' + end + end + + context 'with skip_decompression true' do + let(:options) { { skip_decompression: true } } + + context 'with Accept-Encoding specified' do + let(:headers) { { headers: { 'Accept-Encoding' => 'custom' } } } + it_behaves_like 'sets custom Accept-Encoding' + it_behaves_like 'disables Net::HTTP decompression' + end + + context 'with accept-encoding (lowercase) specified' do + let(:headers) { { headers: { 'accept-encoding' => 'custom' } } } + it_behaves_like 'sets custom Accept-Encoding' + it_behaves_like 'disables Net::HTTP decompression' + end + + context 'with Accept-Encoding and other headers specified' do + let(:headers) { { headers: { 'Accept-Encoding' => 'custom', 'Content-Type' => 'application/json' } } } + it_behaves_like 'sets custom Accept-Encoding' + it_behaves_like 'disables Net::HTTP decompression' + end + + context 'with other headers specified' do + let(:headers) { { 'Content-Type' => 'application/json' } } + it_behaves_like 'sets default Accept-Encoding' + it_behaves_like 'disables Net::HTTP decompression' + end + + context 'with no headers specified' do + let(:headers) { {} } + it_behaves_like 'sets default Accept-Encoding' + it_behaves_like 'disables Net::HTTP decompression' + end end end end diff --git a/spec/httparty_spec.rb b/spec/httparty_spec.rb index 8b2109ec..f1181b9c 100644 --- a/spec/httparty_spec.rb +++ b/spec/httparty_spec.rb @@ -876,6 +876,7 @@ def self.inherited(subclass) before do stub_chunked_http_response_with([chunk], options) do |response| allow(response).to receive(:[]).with('content-type').and_return('text/plain; charset = "utf-8"') + allow(response).to receive(:[]).with('content-encoding').and_return(nil) end end From 7268cb5777a505340fe0ba7739012cdceb5bfc57 Mon Sep 17 00:00:00 2001 From: Johnny Shields Date: Wed, 16 Jun 2021 00:21:16 -0700 Subject: [PATCH 2/2] Fix ssl_spec Fix issue with mock --- spec/httparty/ssl_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/httparty/ssl_spec.rb b/spec/httparty/ssl_spec.rb index 7a1dfa62..fb309dbe 100644 --- a/spec/httparty/ssl_spec.rb +++ b/spec/httparty/ssl_spec.rb @@ -40,7 +40,7 @@ it "should work when using ssl_ca_path with a certificate authority" do http = Net::HTTP.new('www.google.com', 443) - response = double(Net::HTTPResponse, :[] => '', body: '', to_hash: {}) + response = double(Net::HTTPResponse, :[] => '', body: '', to_hash: {}, delete: nil) allow(http).to receive(:request).and_return(response) expect(Net::HTTP).to receive(:new).with('www.google.com', 443).and_return(http) expect(http).to receive(:ca_path=).with('/foo/bar')