Skip to content

Commit

Permalink
Scrape email addresses from URL (#51)
Browse files Browse the repository at this point in the history
  • Loading branch information
AI-Mozi authored Aug 3, 2023
1 parent 733019c commit e32f5da
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .ruby-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ruby-3.1
ruby-3.1
68 changes: 68 additions & 0 deletions lib/ronin/recon/builtin/web/email_addresses.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# frozen_string_literal: true
#
# ronin-recon - A micro-framework and tool for performing reconnaissance.
#
# Copyright (c) 2023 Hal Brodigan (postmodern.mod3@gmail.com)
#
# ronin-recon is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ronin-recon is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with ronin-recon. If not, see <https://www.gnu.org/licenses/>.
#

require 'ronin/recon/web_worker'
require 'ronin/recon/builtin/web/spider'
require 'ronin/support/text/patterns'

module Ronin
module Recon
module Web
#
# A recon worker that returns email addresses found on website.
#
class EmailAddresses < WebWorker

register 'web/email_addresses'

accepts URL

summary 'Extracts emails from a website'

description <<~DESC
Extracts all emails from a website.
DESC

#
# Extract email addresses found in the pages body.
#
# @param [Values::URL] url
# The URL of the page to extract email addresses from.
#
# @yield [email]
# Each email address found on the page will be yielded.
#
# @yieldparam [Values::EmailAddress] email
# Email address found on the page.
#
def process(url)
return nil unless url.body

email_pattern = Ronin::Support::Text::Patterns::EMAIL_ADDRESS

url.body.force_encoding(Encoding::UTF_8).scan(email_pattern) do |email|
yield EmailAddress.new(email)
end
end

end
end
end
end
64 changes: 64 additions & 0 deletions spec/builtin/web/email_addresses_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
require 'spec_helper'
require 'ronin/recon/builtin/web/email_addresses'
require 'ronin/recon/values/url'

describe Ronin::Recon::Web::EmailAddresses do
describe "#process" do
let(:emails) { [] }

context "when URL #body exists" do
context "and email is present" do
let(:body) do
<<~HTML
<html>
<body>
<p>example@example.com</p>
<p>example1@example.com</p>
</body>
</html>
HTML
end
let(:url) { Ronin::Recon::Values::URL.new("example.com", body: body) }
let(:valid_emails) do
[
Ronin::Recon::Values::EmailAddress.new("example@example.com"),
Ronin::Recon::Values::EmailAddress.new("example1@example.com")
]
end

it "must return array of EmailAddresses" do
subject.process(url) { |e| emails << e }

expect(emails).to eq(valid_emails)
end
end

context "and email is not present" do
let(:body) do
<<~HTML
<html>
<body>
<p>without email</p>
</body>
</html>
HTML
end
let(:url) { Ronin::Recon::Values::URL.new("example.com", body: body) }

it "must return empty array" do
subject.process(url) { |e| emails << e }

expect(emails).to eq([])
end
end
end

context "when url body is nil" do
let(:url) { Ronin::Recon::Values::URL.new("example.com") }

it "must return nil" do
expect(subject.process(url)).to be(nil)
end
end
end
end

0 comments on commit e32f5da

Please sign in to comment.