diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f521094..67bc3471 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Mechanize CHANGELOG +## next / unreleased + +* Introduce experimental support for handling Brotli-compressed responses (CRuby only). (#650) @weshatheleopard + + ## 2.11.0 / 2024-07-18 * The `accept-charset` header is no longer sent. In early versions of Mechanize, circa 2007, this was a common header but now no modern browser sends it, and servers are instructed to ignore it. See #646 for an example of a server that is confused by its presence. (#647) @flavorjones diff --git a/Gemfile b/Gemfile index b4e2a20b..8e10e273 100644 --- a/Gemfile +++ b/Gemfile @@ -1,3 +1,9 @@ source "https://rubygems.org" gemspec + +gem "minitest", "~> 5.14" +gem "rake", "~> 13.0" +gem "rdoc", "~> 6.3" +gem "rubocop", "~> 1.12" +gem "brotli", ">= 0.5" unless RUBY_PLATFORM == "java" diff --git a/lib/mechanize/http/agent.rb b/lib/mechanize/http/agent.rb index 894cb06e..199eaa0f 100644 --- a/lib/mechanize/http/agent.rb +++ b/lib/mechanize/http/agent.rb @@ -495,6 +495,34 @@ def content_encoding_inflate body_io body_io.close end + ## + # Decodes a Brotli-encoded +body_io+ + # + # (Experimental, CRuby only) Although Mechanize will never request a Brotli-encoded response via + # `accept-encoding`, buggy servers may return brotli-encoded responses anyway. Let's try to handle + # that case if the Brotli gem is loaded. + # + # If you need to handle Brotli-encoded responses, install the 'brotli' gem and require it in your + # application. If the `Brotli` constant is defined, Mechanize will attempt to use it to inflate + # the response. + # + def content_encoding_brotli(body_io) + log.debug('deflate brotli body') if log + + unless defined?(::Brotli) + raise Mechanize::Error, "cannot deflate brotli-encoded response. Please install and require the 'brotli' gem." + end + + begin + return StringIO.new(Brotli.inflate(body_io.read)) + rescue Brotli::Error + log.error("unable to brotli-inflate response") if log + raise Mechanize::Error, "error inflating brotli-encoded response." + end + ensure + body_io.close + end + def disable_keep_alive request request['connection'] = 'close' unless @keep_alive end @@ -831,6 +859,8 @@ def response_content_encoding response, body_io content_encoding_inflate body_io when 'gzip', 'x-gzip' then content_encoding_gunzip body_io + when 'br' then + content_encoding_brotli body_io else raise Mechanize::Error, "unsupported content-encoding: #{response['Content-Encoding']}" diff --git a/mechanize.gemspec b/mechanize.gemspec index 004d35d5..beca19e3 100644 --- a/mechanize.gemspec +++ b/mechanize.gemspec @@ -71,9 +71,4 @@ Gem::Specification.new do |spec| spec.add_runtime_dependency("rubyntlm", ">= 0.6.3", "~> 0.6") spec.add_runtime_dependency("base64") # removed from bundled gems in 3.4, and needed by rubyntlm (which doesn't declare this dependency) spec.add_runtime_dependency("nkf") # removed from bundled gems in 3.4 - - spec.add_development_dependency("minitest", "~> 5.14") - spec.add_development_dependency("rake", "~> 13.0") - spec.add_development_dependency("rdoc", "~> 6.3") - spec.add_development_dependency("rubocop", "~> 1.12") end diff --git a/test/test_mechanize_http_agent.rb b/test/test_mechanize_http_agent.rb index b9cebb72..ea807962 100644 --- a/test/test_mechanize_http_agent.rb +++ b/test/test_mechanize_http_agent.rb @@ -2,6 +2,7 @@ # frozen_string_literal: true require 'mechanize/test_case' +require "brotli" unless RUBY_PLATFORM == "java" class TestMechanizeHttpAgent < Mechanize::TestCase @@ -924,6 +925,46 @@ def @res.content_length() nil end assert_equal 'part', body.read end + def test_response_content_encoding_brotli_when_brotli_not_loaded + skip("only test this on jruby which doesn't have brotli support") unless RUBY_ENGINE == 'jruby' + + @res.instance_variable_set :@header, 'content-encoding' => %w[br] + body_io = StringIO.new("content doesn't matter for this test") + + e = assert_raises(Mechanize::Error) do + @agent.response_content_encoding(@res, body_io) + end + assert_includes(e.message, "cannot deflate brotli-encoded response") + + assert(body_io.closed?) + end + + def test_response_content_encoding_brotli + skip("jruby does not have brotli support") if RUBY_ENGINE == 'jruby' + + @res.instance_variable_set :@header, 'content-encoding' => %w[br] + body_io = StringIO.new(Brotli.deflate("this is compressed by brotli")) + + body = @agent.response_content_encoding(@res, body_io) + + assert_equal("this is compressed by brotli", body.read) + assert(body_io.closed?) + end + + def test_response_content_encoding_brotli_corrupt + skip("jruby does not have brotli support") if RUBY_ENGINE == 'jruby' + + @res.instance_variable_set :@header, 'content-encoding' => %w[br] + body_io = StringIO.new("not a brotli payload") + + e = assert_raises(Mechanize::Error) do + @agent.response_content_encoding(@res, body_io) + end + assert_includes(e.message, "error inflating brotli-encoded response") + assert_kind_of(Brotli::Error, e.cause) + assert(body_io.closed?) + end + def test_response_content_encoding_gzip_corrupt log = StringIO.new logger = Logger.new log