From 1b94c1b8528267e41076e0bf16bc0b688f1ef350 Mon Sep 17 00:00:00 2001 From: Connor Dunn Date: Sun, 31 May 2015 16:07:43 -0700 Subject: [PATCH 1/4] Add encoding as an option, use charset if available, otherwise make best guess from mime type --- lib/http/chainable.rb | 5 +++++ lib/http/client.rb | 3 ++- lib/http/options.rb | 7 ++++++- lib/http/response.rb | 30 ++++++++++++++++++++++------ lib/http/response/body.rb | 13 ++++++------ spec/lib/http/options/merge_spec.rb | 3 ++- spec/lib/http/response/body_spec.rb | 2 +- spec/lib/http/response_spec.rb | 2 +- spec/lib/http_spec.rb | 24 ++++++++++++++++++++++ spec/support/dummy_server/servlet.rb | 7 +++++++ 10 files changed, 79 insertions(+), 17 deletions(-) diff --git a/lib/http/chainable.rb b/lib/http/chainable.rb index d5fcd411..650115ee 100644 --- a/lib/http/chainable.rb +++ b/lib/http/chainable.rb @@ -180,6 +180,11 @@ def cookies(cookies) branch default_options.with_cookies(cookies) end + # Force a specific encoding for response body + def encoding(encoding) + branch default_options.with_encoding(encoding) + end + # Accept the given MIME type(s) # @param type def accept(type) diff --git a/lib/http/client.rb b/lib/http/client.rb index 210b375b..8d50af7c 100644 --- a/lib/http/client.rb +++ b/lib/http/client.rb @@ -64,7 +64,8 @@ def perform(req, options) @connection.status_code, @connection.http_version, @connection.headers, - Response::Body.new(@connection), + @connection, + options.encoding, req.uri ) diff --git a/lib/http/options.rb b/lib/http/options.rb index 31ec674e..029f802b 100644 --- a/lib/http/options.rb +++ b/lib/http/options.rb @@ -47,7 +47,8 @@ def initialize(options = {}) :ssl => {}, :keep_alive_timeout => 5, :headers => {}, - :cookies => {} + :cookies => {}, + :encoding => nil } opts_w_defaults = defaults.merge(options) @@ -66,6 +67,10 @@ def initialize(options = {}) end end + def_option :encoding do |encoding| + self.encoding = Encoding.find(encoding) + end + %w( proxy params form json body follow response socket_class ssl_socket_class ssl_context ssl diff --git a/lib/http/response.rb b/lib/http/response.rb index dc6064c4..a2fd26b0 100644 --- a/lib/http/response.rb +++ b/lib/http/response.rb @@ -23,12 +23,18 @@ class Response # @return [URI, nil] attr_reader :uri - def initialize(status, version, headers, body, uri = nil) # rubocop:disable ParameterLists - @version = version - @body = body - @uri = uri && HTTP::URI.parse(uri) - @status = HTTP::Response::Status.new status - @headers = HTTP::Headers.coerce(headers || {}) + def initialize(status, version, headers, connection_or_body, encoding = nil, uri = nil) # rubocop:disable ParameterLists + @version = version + @uri = uri && HTTP::URI.parse(uri) + @status = HTTP::Response::Status.new status + @headers = HTTP::Headers.coerce(headers || {}) + @encoding = encoding + + if connection_or_body.is_a? HTTP::Connection + @body = Response::Body.new(connection_or_body, resolved_encoding) + else + @body = connection_or_body + end end # @!method reason @@ -104,5 +110,17 @@ def parse(as = nil) def inspect "#<#{self.class}/#{@version} #{code} #{reason} #{headers.to_h.inspect}>" end + + private + + # Work out what encoding to assume for the body + def resolved_encoding + return @encoding if @encoding + return charset if charset + return Encoding::UTF_8 if mime_type && mime_type.start_with?("text/".freeze) + + Encoding::BINARY + end + end end diff --git a/lib/http/response/body.rb b/lib/http/response/body.rb index 259ff61e..d3fcf1c9 100644 --- a/lib/http/response/body.rb +++ b/lib/http/response/body.rb @@ -9,10 +9,11 @@ class Body include Enumerable def_delegator :to_s, :empty? - def initialize(client) - @client = client - @streaming = nil - @contents = nil + def initialize(client, encoding) + @client = client + @streaming = nil + @contents = nil + @encoding = encoding end # (see HTTP::Client#readpartial) @@ -36,9 +37,9 @@ def to_s begin @streaming = false - @contents = "".force_encoding(Encoding::UTF_8) + @contents = "".force_encoding(@encoding) while (chunk = @client.readpartial) - @contents << chunk.force_encoding(Encoding::ASCII_8BIT) + @contents << chunk.force_encoding(@encoding) end rescue @contents = nil diff --git a/spec/lib/http/options/merge_spec.rb b/spec/lib/http/options/merge_spec.rb index 874779ce..538d9cfc 100644 --- a/spec/lib/http/options/merge_spec.rb +++ b/spec/lib/http/options/merge_spec.rb @@ -55,6 +55,7 @@ :socket_class => described_class.default_socket_class, :ssl_socket_class => described_class.default_ssl_socket_class, :ssl_context => nil, - :cookies => {}) + :cookies => {}, + :encoding => nil) end end diff --git a/spec/lib/http/response/body_spec.rb b/spec/lib/http/response/body_spec.rb index 4dec4312..6e1488d0 100644 --- a/spec/lib/http/response/body_spec.rb +++ b/spec/lib/http/response/body_spec.rb @@ -4,7 +4,7 @@ before { allow(client).to receive(:readpartial) { chunks.shift } } - subject(:body) { described_class.new client } + subject(:body) { described_class.new client, Encoding::UTF_8 } it "streams bodies from responses" do expect(subject.to_s).to eq "Hello, World!" diff --git a/spec/lib/http/response_spec.rb b/spec/lib/http/response_spec.rb index 87ce6660..0f739f25 100644 --- a/spec/lib/http/response_spec.rb +++ b/spec/lib/http/response_spec.rb @@ -2,7 +2,7 @@ let(:body) { "Hello world!" } let(:uri) { "http://example.com/" } let(:headers) { {} } - subject(:response) { HTTP::Response.new 200, "1.1", headers, body, uri } + subject(:response) { HTTP::Response.new 200, "1.1", headers, body, nil, uri } it "includes HTTP::Headers::Mixin" do expect(described_class).to include HTTP::Headers::Mixin diff --git a/spec/lib/http_spec.rb b/spec/lib/http_spec.rb index 258a8287..93e500dc 100644 --- a/spec/lib/http_spec.rb +++ b/spec/lib/http_spec.rb @@ -1,3 +1,5 @@ +# encoding: UTF-8 + require "json" require "support/dummy_server" @@ -164,6 +166,28 @@ end end + context "loading endpoint with charset" do + it "uses charset from headers" do + response = HTTP.get "#{dummy.endpoint}/iso-8859-1" + expect(response.to_s.encoding).to eq(Encoding::ISO8859_1) + expect(response.to_s.encode(Encoding::UTF_8)).to eq("testæ") + end + + context "with encoding option" do + it "respects option" do + response = HTTP.get "#{dummy.endpoint}/iso-8859-1", "encoding" => Encoding::ASCII_8BIT + expect(response.to_s.encoding).to eq(Encoding::ASCII_8BIT) + end + end + end + + context "passing a string encoding type" do + it "finds encoding" do + response = HTTP.get dummy.endpoint, "encoding" => "ascii" + expect(response.to_s.encoding).to eq(Encoding::ASCII) + end + end + context "loading text" do it "is utf-8 encoded" do response = HTTP.get dummy.endpoint diff --git a/spec/support/dummy_server/servlet.rb b/spec/support/dummy_server/servlet.rb index 47ce4d81..55fb30f1 100644 --- a/spec/support/dummy_server/servlet.rb +++ b/spec/support/dummy_server/servlet.rb @@ -1,3 +1,5 @@ +# encoding: UTF-8 + class DummyServer < WEBrick::HTTPServer class Servlet < WEBrick::HTTPServlet::AbstractServlet def self.sockets @@ -129,6 +131,11 @@ def do_#{method.upcase}(req, res) res.body = bytes.pack("c*") end + get "/iso-8859-1" do |_req, res| + res["Content-Type"] = "text/plain; charset=ISO-8859-1" + res.body = "testæ".encode(Encoding::ISO8859_1) + end + get "/cookies" do |req, res| res["Set-Cookie"] = "foo=bar" res.body = req.cookies.map { |c| [c.name, c.value].join ": " }.join("\n") From f40fbda83d9a7d38e419aee5ced22247b5ac4c83 Mon Sep 17 00:00:00 2001 From: Connor Dunn Date: Sun, 31 May 2015 17:18:45 -0700 Subject: [PATCH 2/4] Use opts hash for HTTP::Response.new --- lib/http/client.rb | 12 ++++++------ lib/http/response.rb | 29 +++++++++++++++++++---------- spec/lib/http/client_spec.rb | 11 +++++++++-- spec/lib/http/redirector_spec.rb | 7 ++++++- spec/lib/http/response_spec.rb | 11 ++++++++++- 5 files changed, 50 insertions(+), 20 deletions(-) diff --git a/lib/http/client.rb b/lib/http/client.rb index 8d50af7c..7d39dace 100644 --- a/lib/http/client.rb +++ b/lib/http/client.rb @@ -61,12 +61,12 @@ def perform(req, options) end res = Response.new( - @connection.status_code, - @connection.http_version, - @connection.headers, - @connection, - options.encoding, - req.uri + :status => @connection.status_code, + :version => @connection.http_version, + :headers => @connection.headers, + :connection => @connection, + :encoding => options.encoding, + :uri => req.uri ) @connection.finish_response if req.verb == :head diff --git a/lib/http/response.rb b/lib/http/response.rb index a2fd26b0..07ce0999 100644 --- a/lib/http/response.rb +++ b/lib/http/response.rb @@ -23,17 +23,26 @@ class Response # @return [URI, nil] attr_reader :uri - def initialize(status, version, headers, connection_or_body, encoding = nil, uri = nil) # rubocop:disable ParameterLists - @version = version - @uri = uri && HTTP::URI.parse(uri) - @status = HTTP::Response::Status.new status - @headers = HTTP::Headers.coerce(headers || {}) - @encoding = encoding - - if connection_or_body.is_a? HTTP::Connection - @body = Response::Body.new(connection_or_body, resolved_encoding) + # Inits a new instance + # + # @option opts [Integer] :status Status code + # @option opts [String] :version HTTP version + # @option opts [Hash] :headers + # @option opts [HTTP::Connection] :connection + # @option opts [String] :encoding Encoding to use when reading body + # @option opts [String] :body + # @option opts [String] :uri + def initialize(opts) + @version = opts.fetch(:version) + @uri = opts.include?(:uri) && HTTP::URI.parse(opts.fetch(:uri)) + @status = HTTP::Response::Status.new opts.fetch(:status) + @headers = HTTP::Headers.coerce(opts.fetch(:headers, {})) + @encoding = opts.fetch(:encoding, nil) + + if opts.include?(:connection) + @body = Response::Body.new(opts.fetch(:connection), resolved_encoding) else - @body = connection_or_body + @body = opts.fetch(:body) end end diff --git a/spec/lib/http/client_spec.rb b/spec/lib/http/client_spec.rb index 4ea78294..080f8d73 100644 --- a/spec/lib/http/client_spec.rb +++ b/spec/lib/http/client_spec.rb @@ -26,11 +26,18 @@ def stub(stubs) end def redirect_response(location, status = 302) - HTTP::Response.new(status, "1.1", {"Location" => location}, "") + HTTP::Response.new( + :status => status, + :version => "1.1", + :headers => {"Location" => location}, + :body => "") end def simple_response(body, status = 200) - HTTP::Response.new(status, "1.1", {}, body) + HTTP::Response.new( + :status => status, + :version => "1.1", + :body => body) end describe "following redirects" do diff --git a/spec/lib/http/redirector_spec.rb b/spec/lib/http/redirector_spec.rb index c71ecf15..4ee0e86f 100644 --- a/spec/lib/http/redirector_spec.rb +++ b/spec/lib/http/redirector_spec.rb @@ -1,6 +1,11 @@ RSpec.describe HTTP::Redirector do def simple_response(status, body = "", headers = {}) - HTTP::Response.new(status, "1.1", headers, body) + HTTP::Response.new( + :status => status, + :version => "1.1", + :headers => headers, + :body => body + ) end def redirect_response(status, location) diff --git a/spec/lib/http/response_spec.rb b/spec/lib/http/response_spec.rb index 0f739f25..bd797156 100644 --- a/spec/lib/http/response_spec.rb +++ b/spec/lib/http/response_spec.rb @@ -2,7 +2,16 @@ let(:body) { "Hello world!" } let(:uri) { "http://example.com/" } let(:headers) { {} } - subject(:response) { HTTP::Response.new 200, "1.1", headers, body, nil, uri } + + subject(:response) do + HTTP::Response.new( + :status => 200, + :version => "1.1", + :headers => headers, + :body => body, + :uri => uri + ) + end it "includes HTTP::Headers::Mixin" do expect(described_class).to include HTTP::Headers::Mixin From 8c7c66acbca93ad7d27802a256d615d4f8393fa9 Mon Sep 17 00:00:00 2001 From: Connor Dunn Date: Mon, 1 Jun 2015 10:48:17 -0700 Subject: [PATCH 3/4] Don't guess encoding for text/ mimetype --- lib/http/response.rb | 7 +------ spec/lib/http_spec.rb | 12 ++++++------ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/lib/http/response.rb b/lib/http/response.rb index 07ce0999..95a1f29b 100644 --- a/lib/http/response.rb +++ b/lib/http/response.rb @@ -124,12 +124,7 @@ def inspect # Work out what encoding to assume for the body def resolved_encoding - return @encoding if @encoding - return charset if charset - return Encoding::UTF_8 if mime_type && mime_type.start_with?("text/".freeze) - - Encoding::BINARY + @encoding || charset || Encoding::BINARY end - end end diff --git a/spec/lib/http_spec.rb b/spec/lib/http_spec.rb index 93e500dc..8d5c1380 100644 --- a/spec/lib/http_spec.rb +++ b/spec/lib/http_spec.rb @@ -162,7 +162,7 @@ context "loading binary data" do it "is encoded as bytes" do response = HTTP.get "#{dummy.endpoint}/bytes" - expect(response.to_s.encoding).to eq(Encoding::ASCII_8BIT) + expect(response.to_s.encoding).to eq(Encoding::BINARY) end end @@ -175,8 +175,8 @@ context "with encoding option" do it "respects option" do - response = HTTP.get "#{dummy.endpoint}/iso-8859-1", "encoding" => Encoding::ASCII_8BIT - expect(response.to_s.encoding).to eq(Encoding::ASCII_8BIT) + response = HTTP.get "#{dummy.endpoint}/iso-8859-1", "encoding" => Encoding::BINARY + expect(response.to_s.encoding).to eq(Encoding::BINARY) end end end @@ -188,10 +188,10 @@ end end - context "loading text" do - it "is utf-8 encoded" do + context "loading text with no charset" do + it "is binary encoded" do response = HTTP.get dummy.endpoint - expect(response.to_s.encoding).to eq(Encoding::UTF_8) + expect(response.to_s.encoding).to eq(Encoding::BINARY) end end From 6de9119934b98d053e21a513ef33819bc66d395d Mon Sep 17 00:00:00 2001 From: Alexey V Zapparov Date: Sun, 13 Dec 2015 19:45:48 +0100 Subject: [PATCH 4/4] Cleanup HTTP::Response --- lib/http/response.rb | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/lib/http/response.rb b/lib/http/response.rb index 95a1f29b..b99530ca 100644 --- a/lib/http/response.rb +++ b/lib/http/response.rb @@ -37,10 +37,10 @@ def initialize(opts) @uri = opts.include?(:uri) && HTTP::URI.parse(opts.fetch(:uri)) @status = HTTP::Response::Status.new opts.fetch(:status) @headers = HTTP::Headers.coerce(opts.fetch(:headers, {})) - @encoding = opts.fetch(:encoding, nil) if opts.include?(:connection) - @body = Response::Body.new(opts.fetch(:connection), resolved_encoding) + encoding = opts[:encoding] || charset || Encoding::BINARY + @body = Response::Body.new(opts.fetch(:connection), encoding) else @body = opts.fetch(:body) end @@ -85,19 +85,15 @@ def content_type @content_type ||= ContentType.parse headers[Headers::CONTENT_TYPE] end - # MIME type of response (if any) - # - # @return [String, nil] - def mime_type - @mime_type ||= content_type.mime_type - end + # @!method mime_type + # MIME type of response (if any) + # @return [String, nil] + def_delegator :content_type, :mime_type - # Charset of response (if any) - # - # @return [String, nil] - def charset - @charset ||= content_type.charset - end + # @!method charset + # Charset of response (if any) + # @return [String, nil] + def_delegator :content_type, :charset def cookies @cookies ||= headers.each_with_object CookieJar.new do |(k, v), jar| @@ -119,12 +115,5 @@ def parse(as = nil) def inspect "#<#{self.class}/#{@version} #{code} #{reason} #{headers.to_h.inspect}>" end - - private - - # Work out what encoding to assume for the body - def resolved_encoding - @encoding || charset || Encoding::BINARY - end end end