diff --git a/app/mailers/request_mailer.rb b/app/mailers/request_mailer.rb index fa42e853f9..47daf34ee9 100644 --- a/app/mailers/request_mailer.rb +++ b/app/mailers/request_mailer.rb @@ -235,35 +235,47 @@ def requests_matching_email(email) InfoRequest.matching_incoming_email(addresses) end + def send_to_holding_pen(email, raw_email, opts) + opts[:rejected_reason] = + _("Could not identify the request from the email address") + request = InfoRequest.holding_pen_request + request.receive(email, raw_email, opts) + end + # Member function, called on the new class made in self.receive above def receive(email, raw_email, source = :mailin) opts = { source: source } - # Find which info requests the email is for - reply_info_requests = requests_matching_email(email) + # Only check mail that doesn't have spam in the header + return if SpamAddress.spam?(MailHandler.get_all_addresses(email)) - # Nothing found OR multiple different info requests, so save in holding pen - if reply_info_requests.empty? || reply_info_requests.count > 1 - opts[:rejected_reason] = - _("Could not identify the request from the email address") - request = InfoRequest.holding_pen_request + # Find exact matches for info requests + exact_info_requests = requests_matching_email(email) - unless SpamAddress.spam?(MailHandler.get_all_addresses(email)) - request.receive(email, raw_email, opts) - end - return + # Find any guesses for info requests + unless exact_info_requests.count == 1 + guessed_info_requests = Guess.guessed_info_requests(email) end - # Send the message to each request, to be archived with it - reply_info_requests.each do |reply_info_request| - # If environment variable STOP_DUPLICATES is set, don't send message with same id again - if ENV['STOP_DUPLICATES'] - if reply_info_request.already_received?(email, raw_email) - raise "message #{ email.message_id } already received by request" - end + # If there is only one info request matching mail, it gets attached to the + # request to be archived with it + if exact_info_requests.count == 1 || guessed_info_requests.count == 1 + info_request = exact_info_requests.first || guessed_info_requests.first + + if exact_info_requests.empty? && guessed_info_requests.count == 1 + info_request.log_event( + 'redeliver_incoming', + editor: 'automatic', + destination_request: info_request + ) end - reply_info_request.receive(email, raw_email, opts) + info_request.receive(email, raw_email, opts) + + else + # Otherwise, if there are no matching IRs, multiple IRs, or multiple IR + # guesses, we send the mail to the holding pen + send_to_holding_pen(email, raw_email, opts) end end diff --git a/app/models/guess.rb b/app/models/guess.rb new file mode 100644 index 0000000000..f75eafbf70 --- /dev/null +++ b/app/models/guess.rb @@ -0,0 +1,64 @@ +require 'text' + +## +# A guess at which info request a incoming message should be associated to +# +class Guess + attr_reader :info_request, :components + + # The percentage similarity the id or idhash much fulfil + THRESHOLD = 0.8 + + ## + # Return InfoRequest which we guess should receive an incoming message based + # on a threshold. + # + def self.guessed_info_requests(email) + # Match the email address in the message without matching the hash + email_addresses = MailHandler.get_all_addresses(email) + guesses = InfoRequest.guess_by_incoming_email(email_addresses) + + guesses_reaching_threshold = guesses.select do |ir_guess| + id_score = ir_guess.id_score + idhash_score = ir_guess.idhash_score + + (id_score == 1 && idhash_score >= THRESHOLD) || + (id_score >= THRESHOLD && idhash_score == 1) + end + + guesses_reaching_threshold.map(&:info_request).uniq + end + + def initialize(info_request, **components) + @info_request = info_request + @components = components + end + + def [](key) + components[key] + end + + def id_score + return 1 unless self[:id] + similarity(self[:id], info_request.id) + end + + def idhash_score + return 1 unless self[:idhash] + similarity(self[:idhash], info_request.idhash) + end + + def ==(other) + info_request == other.info_request && components == other.components + end + + def match_method + components.keys.first + end + + private + + def similarity(a, b) + Text::WhiteSimilarity.similarity(a.to_s, b.to_s) + end +end diff --git a/app/models/info_request.rb b/app/models/info_request.rb index 2175f27dbe..4c1b25334c 100644 --- a/app/models/info_request.rb +++ b/app/models/info_request.rb @@ -40,7 +40,6 @@ require 'fileutils' class InfoRequest < ApplicationRecord - Guess = Struct.new(:info_request, :matched_value, :match_method).freeze OLD_AGE_IN_DAYS = 21.days include Rails.application.routes.url_helpers @@ -245,8 +244,13 @@ def self.guess_by_incoming_email(*emails) guesses = emails.flatten.reduce([]) do |memo, email| id, idhash = _extract_id_hash_from_email(email) id, idhash = _guess_idhash_from_email(email) if idhash.nil? || id.nil? - memo << Guess.new(find_by_id(id), email, :id) - memo << Guess.new(find_by_idhash(idhash), email, :idhash) + + memo << Guess.new( + find_by_id(id), email: email, id: id, idhash: idhash + ) + memo << Guess.new( + find_by_idhash(idhash), email: email, id: id, idhash: idhash + ) end # Unique Guesses where we've found an `InfoRequest` @@ -315,7 +319,7 @@ def self.guess_by_incoming_subject(subject_line) limit(25) guesses = requests.each.reduce([]) do |memo, request| - memo << Guess.new(request, subject_line, :subject) + memo << Guess.new(request, subject: subject_line) end # Unique Guesses where we've found an `InfoRequest` diff --git a/app/views/admin_raw_email/_holding_pen.html.erb b/app/views/admin_raw_email/_holding_pen.html.erb index 853c22c27e..fd5ec87113 100644 --- a/app/views/admin_raw_email/_holding_pen.html.erb +++ b/app/views/admin_raw_email/_holding_pen.html.erb @@ -54,12 +54,12 @@ <%= guess.match_method %> <% if guess.match_method == :subject %> because the incoming message has a subject of - <%= guess.matched_value %> + <%= guess[:subject] %> <% else %> because it has an incoming email address of <%= guess.info_request.incoming_email %> and this incoming message was sent to - <%= guess.matched_value %>. + <%= guess[:email] %>. <% end %>

diff --git a/spec/controllers/admin_raw_email_controller_spec.rb b/spec/controllers/admin_raw_email_controller_spec.rb index ef923f2f42..3bf0025805 100644 --- a/spec/controllers/admin_raw_email_controller_spec.rb +++ b/spec/controllers/admin_raw_email_controller_spec.rb @@ -63,7 +63,10 @@ it 'assigns guessed requests based on the hash' do get :show, params: { id: incoming_message.raw_email.id } - guess = InfoRequest::Guess.new(info_request, invalid_to, :idhash) + idhash = InfoRequest.hash_from_id(info_request.id) + guess = Guess.new( + info_request, email: invalid_to, id: nil, idhash: idhash + ) expect(assigns[:guessed_info_requests]).to eq([guess]) end @@ -72,7 +75,7 @@ FactoryBot.create(:incoming_message, subject: 'Basic Email'). info_request get :show, params: { id: incoming_message.raw_email.id } - guess = InfoRequest::Guess.new(other_request, 'Basic Email', :subject) + guess = Guess.new(other_request, subject: 'Basic Email') expect(assigns[:guessed_info_requests]).to include(guess) end diff --git a/spec/integration/admin_spec.rb b/spec/integration/admin_spec.rb index e298236a4a..edfc4c0a96 100644 --- a/spec/integration/admin_spec.rb +++ b/spec/integration/admin_spec.rb @@ -127,26 +127,6 @@ expect(page).to have_content "Only the authority can reply to this request" end end - - it "guesses a misdirected request" do - info_request = FactoryBot.create(:info_request, - allow_new_responses_from: 'authority_only', - handle_rejected_responses: 'holding_pen') - mail_to = "request-#{info_request.id}-asdfg@example.com" - receive_incoming_mail('incoming-request-plain.email', email_to: mail_to) - interesting_email = last_holding_pen_mail - - # now we add another message to the queue, which we're not interested in - receive_incoming_mail('incoming-request-plain.email', - email_to: info_request.incoming_email, - email_from: "") - expect(holding_pen_messages.length).to eq(2) - using_session(@admin) do - visit admin_raw_email_path interesting_email - expect(page).to have_content "Could not identify the request" - expect(page).to have_content info_request.title - end - end end describe 'generating an upload url' do diff --git a/spec/mailers/request_mailer_spec.rb b/spec/mailers/request_mailer_spec.rb index b2563bb73c..3cd72d6b3d 100644 --- a/spec/mailers/request_mailer_spec.rb +++ b/spec/mailers/request_mailer_spec.rb @@ -24,6 +24,25 @@ deliveries.clear end + it "should append it to the appropriate request if there is only one guess of information request" do + ir = info_requests(:fancy_dog_request) + expect(ir.incoming_messages.count).to eq(1) # in the fixture + receive_incoming_mail( + 'incoming-request-plain.email', + email_to: "request-#{ir.id}-#{ir.idhash}a@localhost" + ) + expect(ir.incoming_messages.count).to eq(2) # one more arrives + expect(ir.info_request_events[-1].incoming_message_id).not_to be_nil + expect(ir.info_request_events[-2].params[:editor]).to eq("automatic") + + deliveries = ActionMailer::Base.deliveries + expect(deliveries.size).to eq(1) + mail = deliveries[0] + # to the user who sent fancy_dog_request + expect(mail.to).to eq(['bob@localhost']) + deliveries.clear + end + it "should store mail in holding pen and send to admin when the email is not to any information request" do ir = info_requests(:fancy_dog_request) expect(ir.incoming_messages.count).to eq(1) diff --git a/spec/models/guess_spec.rb b/spec/models/guess_spec.rb new file mode 100644 index 0000000000..d579bba0ae --- /dev/null +++ b/spec/models/guess_spec.rb @@ -0,0 +1,95 @@ +require 'spec_helper' + +RSpec.describe Guess do + let(:info_request) do + FactoryBot.create(:info_request, id: 100, idhash: '4e637388') + end + + describe '.guessed_info_requests' do + subject(:guesses) { described_class.guessed_info_requests(email) } + + let(:email) do + mail = Mail.new + mail.to address + mail + end + + let(:info_request) { FactoryBot.create(:info_request, id: 4566) } + let!(:other_info_request) { FactoryBot.create(:info_request) } + + let(:id) { info_request.id } + let(:hash) { info_request.idhash } + + context 'with email matching ID and ID hash' do + let(:address) { info_request.incoming_email } + + it 'return matching InfoRequest' do + is_expected.to match_array([info_request]) + end + end + + context 'with email matching ID and almost ID hash' do + let(:address) { "request-#{id}-#{hash[0...-1]}}z@localhost" } + + it 'return guessed InfoRequest' do + is_expected.to match_array([info_request]) + end + end + + context 'with email matching ID hash and almost ID' do + let(:address) { "request-#{id.to_s[0...-1]}-#{hash}@localhost" } + + it 'return guessed InfoRequest' do + is_expected.to match_array([info_request]) + end + end + end + + describe 'with a subject line given' do + let(:guess) { described_class.new(info_request, subject: 'subject_line') } + + it 'returns an id_score of 1' do + expect(guess.id_score).to eq(1) + end + + it 'returns an idhash_score of 1' do + expect(guess.idhash_score).to eq(1) + end + end + + describe 'with an id given' do + let(:guess_1) { described_class.new(info_request, id: 100) } + let(:guess_2) { described_class.new(info_request, id: 456) } + let(:guess_3) { described_class.new(info_request, id: 109) } + + it 'returns an id_score of 1 when it is correct' do + expect(guess_1.id_score).to eq(1.0) + end + + it 'returns an id_score of 0 when there is no similarity' do + expect(guess_2.id_score).to eq(0.0) + end + + it 'returns a value between 0 and 1 when there is some similarity' do + expect(guess_3.id_score).to be_between(0, 1).exclusive + end + end + + describe 'with an idhash given' do + let(:guess_1) { described_class.new(info_request, idhash: '4e637388') } + let(:guess_2) { described_class.new(info_request, idhash: '12345678') } + let(:guess_3) { described_class.new(info_request, idhash: '4e637399') } + + it 'returns an idhash_score of 1 when it is correct' do + expect(guess_1.idhash_score).to eq(1.0) + end + + it 'returns an idhash_score of 0 when there is no similarity' do + expect(guess_2.idhash_score).to eq(0.0) + end + + it 'returns a value between 0 and 1 when there is some similarity' do + expect(guess_3.idhash_score).to be_between(0, 1).exclusive + end + end +end diff --git a/spec/models/info_request_spec.rb b/spec/models/info_request_spec.rb index b5ab265ac7..6a0fd5920c 100644 --- a/spec/models/info_request_spec.rb +++ b/spec/models/info_request_spec.rb @@ -1910,7 +1910,16 @@ context 'email with an intact id and broken idhash' do let(:email) { "request-#{ info_request.id }-asdfg@example.com" } - let(:guess) { described_class::Guess.new(info_request, email, :id) } + + let(:guess) do + Guess.new( + info_request, + email: email, + id: info_request.id, + idhash: 'asdfg' + ) + end + it { is_expected.to include(guess) } end @@ -1919,7 +1928,15 @@ "request-#{ info_request.id }ab-#{ info_request.idhash }@example.com" end - let(:guess) { described_class::Guess.new(info_request, email, :idhash) } + let(:guess) do + Guess.new( + info_request, + email: email, + id: nil, + idhash: info_request.idhash + ) + end + it { is_expected.to include(guess) } end @@ -1928,13 +1945,30 @@ "request-a12x3b-#{ info_request.idhash }@example.com" end - let(:guess) { described_class::Guess.new(info_request, email, :idhash) } + let(:guess) do + Guess.new( + info_request, + email: email, + id: nil, + idhash: info_request.idhash + ) + end + it { is_expected.to include(guess) } end context 'upper case email with a broken id and otherwise intact idhash' do let(:email) { "REQUEST-123a-#{ info_request.idhash.upcase }@example.com" } - let(:guess) { described_class::Guess.new(info_request, email, :idhash) } + + let(:guess) do + Guess.new( + info_request, + email: email, + id: nil, + idhash: info_request.idhash + ) + end + it { is_expected.to include(guess) } end @@ -1943,7 +1977,15 @@ "request#{ info_request.id }#{ info_request.idhash }@example.com" end - let(:guess) { described_class::Guess.new(info_request, email, :id) } + let(:guess) do + Guess.new( + info_request, + email: email, + id: info_request.id, + idhash: info_request.idhash + ) + end + it { is_expected.to include(guess) } end @@ -1952,19 +1994,45 @@ "request-#{ info_request.id }#{ info_request.idhash }@example.com" end - let(:guess) { described_class::Guess.new(info_request, email, :id) } + let(:guess) do + Guess.new( + info_request, + email: email, + id: info_request.id, + idhash: info_request.idhash + ) + end + it { is_expected.to include(guess) } end context 'email with a broken id and an intact idhash but missing punctuation' do let(:email) { "request123ab#{ info_request.idhash }@example.com" } - let(:guess) { described_class::Guess.new(info_request, email, :idhash) } + + let(:guess) do + Guess.new( + info_request, + email: email, + id: nil, + idhash: info_request.idhash + ) + end + it { is_expected.to include(guess) } end context 'email with an intact id and a broken idhash but missing punctuation' do let(:email) { "request#{ info_request.id }abcdefgh@example.com" } - let(:guess) { described_class::Guess.new(info_request, email, :id) } + + let(:guess) do + Guess.new( + info_request, + email: email, + id: info_request.id, + idhash: 'abcdefgh' + ) + end + it { is_expected.to include(guess) } end @@ -1972,13 +2040,31 @@ before { InfoRequest.where(id: 1_231_014).destroy_all } let!(:info_request) { FactoryBot.create(:info_request, id: 1_231_014) } let(:email) { 'request-123loL4abcdefgh@example.com' } - let(:guess) { described_class::Guess.new(info_request, email, :id) } + + let(:guess) do + Guess.new( + info_request, + email: email, + id: info_request.id, + idhash: 'abcdefgh' + ) + end + it { is_expected.to include(guess) } end context 'email with a broken id and an intact idhash but broken format' do let(:email) { "reqeust=123ab#{ info_request.idhash }@example.com" } - let(:guess) { described_class::Guess.new(info_request, email, :idhash) } + + let(:guess) do + Guess.new( + info_request, + email: email, + id: nil, + idhash: info_request.idhash + ) + end + it { is_expected.to include(guess) } end @@ -2000,10 +2086,22 @@ "request-#{ info_request_1.id }-#{ info_request_2.idhash }@example.com" end - let(:guess_1) { described_class::Guess.new(info_request_1, email, :id) } + let(:guess_1) do + Guess.new( + info_request_1, + email: email, + id: info_request_1.id, + idhash: info_request_2.idhash + ) + end let(:guess_2) do - described_class::Guess.new(info_request_2, email, :idhash) + Guess.new( + info_request_2, + email: email, + id: info_request_1.id, + idhash: info_request_2.idhash + ) end it { is_expected.to match_array([guess_1, guess_2]) } @@ -2013,7 +2111,15 @@ let(:email_1) { "request-123ab-#{ info_request.idhash }@example.com" } let(:email_2) { "request-#{ info_request.id }-asdfg@example.com" } let(:email) { [email_1, email_2] } - let(:guess) { described_class::Guess.new(info_request, email_1, :idhash) } + + let(:guess) do + Guess.new( + info_request, + email: email_1, + id: nil, + idhash: info_request.idhash + ) + end it { is_expected.to match_array([guess]) } end @@ -2032,10 +2138,22 @@ let(:email) { [email_1, email_2] } - let(:guess_1) { described_class::Guess.new(info_request_1, email_1, :id) } + let(:guess_1) do + Guess.new( + info_request_1, + email: email_1, + id: info_request_1.id, + idhash: info_request_2.idhash + ) + end let(:guess_2) do - described_class::Guess.new(info_request_2, email_1, :idhash) + Guess.new( + info_request_2, + email: email_1, + id: info_request_1.id, + idhash: info_request_2.idhash + ) end it { is_expected.to match_array([guess_1, guess_2]) } @@ -2055,9 +2173,7 @@ context 'a direct reply to the original request email' do let(:subject_line) { info_request.email_subject_followup } - let(:guess) do - described_class::Guess.new(info_request, subject_line, :subject) - end + let(:guess) { Guess.new(info_request, subject: subject_line) } it { is_expected.to include(guess) } end @@ -2067,9 +2183,7 @@ info_request.email_subject_followup.gsub('Re: ', 'RE: ') end - let(:guess) do - described_class::Guess.new(info_request, subject_line, :subject) - end + let(:guess) { Guess.new(info_request, subject: subject_line) } it { is_expected.to include(guess) } end @@ -2087,13 +2201,8 @@ 'Re: Freedom of Information request - How many jelly beans?' end - let(:guess_1) do - described_class::Guess.new(info_request_1, subject_line, :subject) - end - - let(:guess_2) do - described_class::Guess.new(info_request_2, subject_line, :subject) - end + let(:guess_1) { Guess.new(info_request_1, subject: subject_line) } + let(:guess_2) { Guess.new(info_request_2, subject: subject_line) } it { is_expected.to match_array([guess_1, guess_2]) } end @@ -2114,10 +2223,7 @@ end let(:subject_line) { 'Our ref: 12345678' } - - let(:guess) do - described_class::Guess.new(info_request, subject_line, :subject) - end + let(:guess) { Guess.new(info_request, subject: subject_line) } it { is_expected.to include(guess) } end @@ -2132,9 +2238,7 @@ end let(:guess) do - described_class::Guess.new(InfoRequest.holding_pen_request, - subject_line, - :subject) + Guess.new(InfoRequest.holding_pen_request, subject: subject_line) end it { is_expected.to_not include(guess) } @@ -2148,9 +2252,7 @@ info_request: info_request) end - let(:guess) do - described_class::Guess.new(info_request, subject_line, :subject) - end + let(:guess) { Guess.new(info_request, subject: subject_line) } it { is_expected.to include(guess) } end @@ -2166,13 +2268,8 @@ FactoryBot.create(:incoming_message, subject: subject_line).info_request end - let(:guess_1) do - described_class::Guess.new(info_request_1, subject_line, :subject) - end - - let(:guess_2) do - described_class::Guess.new(info_request_2, subject_line, :subject) - end + let(:guess_1) { Guess.new(info_request_1, subject: subject_line) } + let(:guess_2) { Guess.new(info_request_2, subject: subject_line) } it { is_expected.to match_array([guess_1, guess_2]) } end