-
Notifications
You must be signed in to change notification settings - Fork 4.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
8e6c84e
commit 6a161a4
Showing
10 changed files
with
293 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
local cache = require "kong.tools.database_cache" | ||
|
||
local _M = {} | ||
|
||
local INDEX = "bot_detection_index" | ||
|
||
function _M.set(key, value) | ||
cache.set(cache.bot_detection_key(key), value) | ||
local index_keys = cache.get(INDEX) | ||
if not index_keys then index_keys = {} end | ||
index_keys[#index_keys+1] = key | ||
cache.set(INDEX, index_keys) | ||
end | ||
|
||
function _M.get(key) | ||
return cache.get(cache.bot_detection_key(key)) | ||
end | ||
|
||
function _M.reset() | ||
local index_keys = cache.get(INDEX) | ||
for _, key in ipairs(index_keys) do | ||
cache.delete(cache.bot_detection_key(key)) | ||
end | ||
cache.delete(INDEX) | ||
end | ||
|
||
return _M |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
local BasePlugin = require "kong.plugins.base_plugin" | ||
local responses = require "kong.tools.responses" | ||
local rules = require "kong.plugins.bot-detection.rules" | ||
local stringy = require "stringy" | ||
local bot_cache = require "kong.plugins.bot-detection.cache" | ||
|
||
local ipairs = ipairs | ||
local get_headers = ngx.req.get_headers | ||
local re_match = ngx.re.match | ||
|
||
local BotDetectionHandler = BasePlugin:extend() | ||
|
||
BotDetectionHandler.PRIORITY = 2500 | ||
|
||
local function get_user_agent() | ||
local user_agent = get_headers()["user-agent"] | ||
if type(user_agent) == "table" then | ||
return nil, "Only one User-Agent header allowed" | ||
end | ||
return user_agent | ||
end | ||
|
||
function BotDetectionHandler:new() | ||
BotDetectionHandler.super.new(self, "bot-detection") | ||
end | ||
|
||
function BotDetectionHandler:access(conf) | ||
BotDetectionHandler.super.access(self) | ||
|
||
local user_agent, err = get_user_agent() | ||
if err then | ||
return responses.send_HTTP_BAD_REQUEST(err) | ||
end | ||
|
||
if user_agent then | ||
user_agent = stringy.strip(user_agent) | ||
|
||
-- Cache key, per API | ||
local cache_key = ngx.ctx.api.id..":"..user_agent | ||
|
||
-- The cache already has the user_agents that should be blocked | ||
-- So we avoid matching the regexes everytime | ||
local cached_match = bot_cache.get(cache_key) | ||
if cached_match ~= nil then | ||
if cached_match then | ||
return | ||
else | ||
return responses.send_HTTP_FORBIDDEN() | ||
end | ||
end | ||
|
||
if conf.whitelist then | ||
for _, rule in ipairs(conf.whitelist) do | ||
if re_match(user_agent, rule) then | ||
bot_cache.set(cache_key, true) | ||
return | ||
end | ||
end | ||
end | ||
|
||
if conf.blacklist then | ||
for _, rule in ipairs(conf.blacklist) do | ||
if re_match(user_agent, rule) then | ||
bot_cache.set(cache_key, false) | ||
return responses.send_HTTP_FORBIDDEN() | ||
end | ||
end | ||
end | ||
|
||
for _, rule in ipairs(rules.bots) do | ||
if re_match(user_agent, rule) then | ||
bot_cache.set(cache_key, false) | ||
return responses.send_HTTP_FORBIDDEN() | ||
end | ||
end | ||
|
||
bot_cache.set(cache_key, true) | ||
end | ||
end | ||
|
||
return BotDetectionHandler |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
local events = require "kong.core.events" | ||
local bot_cache = require "kong.plugins.bot-detection.cache" | ||
|
||
local function invalidate(message_t) | ||
if message_t.collection == "plugins" and message_t.entity.name == "bot-detection" then | ||
bot_cache.reset() | ||
end | ||
end | ||
|
||
return { | ||
[events.TYPES.ENTITY_UPDATED] = function(message_t) | ||
invalidate(message_t) | ||
end | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
-- List taken from https://github.com/ua-parser/uap-core/blob/master/regexes.yaml | ||
|
||
return { | ||
bots = { | ||
[[(Pingdom.com_bot_version_)(\d+)\.(\d+)]], -- Pingdom | ||
[[(facebookexternalhit)/(\d+)\.(\d+)]], -- Facebook | ||
[[Google.*/\+/web/snippet]], -- Google Plus | ||
[[(Twitterbot)/(\d+)\.(\d+)]], -- Twitter | ||
[[/((?:Ant-)?Nutch|[A-z]+[Bb]ot|[A-z]+[Ss]pider|Axtaris|fetchurl|Isara|ShopSalad|Tailsweep)[ \-](\d+)(?:\.(\d+)(?:\.(\d+))?)?]], -- Bots Pattern '/name-0.0' | ||
[[(008|Altresium|Argus|BaiduMobaider|BoardReader|DNSGroup|DataparkSearch|EDI|Goodzer|Grub|INGRID|Infohelfer|LinkedInBot|LOOQ|Nutch|PathDefender|Peew|PostPost|Steeler|Twitterbot|VSE|WebCrunch|WebZIP|Y!J-BR[A-Z]|YahooSeeker|envolk|sproose|wminer)/(\d+)(?:\.(\d+)(?:\.(\d+))?)?]], --Bots Pattern 'name/0.0' | ||
[[(MSIE) (\d+)\.(\d+)([a-z]\d?)?;.* MSIECrawler]], --MSIECrawler | ||
[[(Google-HTTP-Java-Client|Apache-HttpClient|http%20client|Python-urllib|HttpMonitor|TLSProber|WinHTTP|JNLP)(?:[ /](\d+)(?:\.(\d+)(?:\.(\d+))?)?)?]], -- Downloader ... | ||
[[(1470\.net crawler|50\.nu|8bo Crawler Bot|Aboundex|Accoona-[A-z]+-Agent|AdsBot-Google(?:-[a-z]+)?|altavista|AppEngine-Google|archive.*?\.org_bot|archiver|Ask Jeeves|[Bb]ai[Dd]u[Ss]pider(?:-[A-Za-z]+)*|bingbot|BingPreview|blitzbot|BlogBridge|BoardReader(?: [A-Za-z]+)*|boitho.com-dc|BotSeer|\b\w*favicon\w*\b|\bYeti(?:-[a-z]+)?|Catchpoint bot|[Cc]harlotte|Checklinks|clumboot|Comodo HTTP\(S\) Crawler|Comodo-Webinspector-Crawler|ConveraCrawler|CRAWL-E|CrawlConvera|Daumoa(?:-feedfetcher)?|Feed Seeker Bot|findlinks|Flamingo_SearchEngine|FollowSite Bot|furlbot|Genieo|gigabot|GomezAgent|gonzo1|(?:[a-zA-Z]+-)?Googlebot(?:-[a-zA-Z]+)?|Google SketchUp|grub-client|gsa-crawler|heritrix|HiddenMarket|holmes|HooWWWer|htdig|ia_archiver|ICC-Crawler|Icarus6j|ichiro(?:/mobile)?|IconSurf|IlTrovatore(?:-Setaccio)?|InfuzApp|Innovazion Crawler|InternetArchive|IP2[a-z]+Bot|jbot\b|KaloogaBot|Kraken|Kurzor|larbin|LEIA|LesnikBot|Linguee Bot|LinkAider|LinkedInBot|Lite Bot|Llaut|lycos|Mail\.RU_Bot|masidani_bot|Mediapartners-Google|Microsoft .*? Bot|mogimogi|mozDex|MJ12bot|msnbot(?:-media *)?|msrbot|netresearch|Netvibes|NewsGator[^/]*|^NING|Nutch[^/]*|Nymesis|ObjectsSearch|Orbiter|OOZBOT|PagePeeker|PagesInventory|PaxleFramework|Peeplo Screenshot Bot|PlantyNet_WebRobot|Pompos|Read%20Later|Reaper|RedCarpet|Retreiver|Riddler|Rival IQ|scooter|Scrapy|Scrubby|searchsight|seekbot|semanticdiscovery|Simpy|SimplePie|SEOstats|SimpleRSS|SiteCon|Slurp|snappy|Speedy Spider|Squrl Java|TheUsefulbot|ThumbShotsBot|Thumbshots\.ru|TwitterBot|URL2PNG|Vagabondo|VoilaBot|^vortex|Votay bot|^voyager|WASALive.Bot|Web-sniffer|WebThumb|WeSEE:[A-z]+|WhatWeb|WIRE|WordPress|Wotbox|www\.almaden\.ibm\.com|Xenu(?:.s)? Link Sleuth|Xerka [A-z]+Bot|yacy(?:bot)?|Yahoo[a-z]*Seeker|Yahoo! Slurp|Yandex\w+|YodaoBot(?:-[A-z]+)?|YottaaMonitor|Yowedo|^Zao|^Zao-Crawler|ZeBot_www\.ze\.bz|ZooShot|ZyBorg)(?:[ /]v?(\d+)(?:\.(\d+)(?:\.(\d+))?)?)?]], -- Bots | ||
[[(?:\/[A-Za-z0-9\.]+)? *([A-Za-z0-9 \-_\!\[\]:]*(?:[Aa]rchiver|[Ii]ndexer|[Ss]craper|[Bb]ot|[Ss]pider|[Cc]rawl[a-z]*))/(\d+)(?:\.(\d+)(?:\.(\d+))?)?]], -- Bots General matcher 'name/0.0' | ||
[[(?:\/[A-Za-z0-9\.]+)? *([A-Za-z0-9 _\!\[\]:]*(?:[Aa]rchiver|[Ii]ndexer|[Ss]craper|[Bb]ot|[Ss]pider|[Cc]rawl[a-z]*)) (\d+)(?:\.(\d+)(?:\.(\d+))?)?]], -- Bots General matcher 'name 0.0' | ||
[[((?:[A-z0-9]+|[A-z\-]+ ?)?(?: the )?(?:[Ss][Pp][Ii][Dd][Ee][Rr]|[Ss]crape|[A-Za-z0-9-]*(?:[^C][^Uu])[Bb]ot|[Cc][Rr][Aa][Ww][Ll])[A-z0-9]*)(?:(?:[ /]| v)(\d+)(?:\.(\d+)(?:\.(\d+))?)?)?]] -- Bots containing spider|scrape|bot(but not CUBOT)|Crawl | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
return { | ||
no_consumer = true, | ||
fields = { | ||
whitelist = { type = "array" }, | ||
blacklist = { type = "array" } | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
local spec_helper = require "spec.spec_helpers" | ||
local http_client = require "kong.tools.http_client" | ||
|
||
local PROXY_URL = spec_helper.PROXY_URL | ||
local STUB_GET_URL = PROXY_URL.."/request" | ||
|
||
local HELLOWORLD = "HelloWorld" | ||
local FACEBOOK = "facebookexternalhit/1.1" | ||
|
||
describe("Logging Plugins", function() | ||
|
||
setup(function() | ||
spec_helper.prepare_db() | ||
spec_helper.insert_fixtures { | ||
api = { | ||
{ request_host = "bot.com", upstream_url = "http://mockbin.com" }, | ||
{ request_host = "bot2.com", upstream_url = "http://mockbin.com" }, | ||
{ request_host = "bot3.com", upstream_url = "http://mockbin.com" } | ||
}, | ||
plugin = { | ||
{ name = "bot-detection", config = {}, __api = 1 }, | ||
{ name = "bot-detection", config = {blacklist = HELLOWORLD}, __api = 2 }, | ||
{ name = "bot-detection", config = {whitelist = FACEBOOK}, __api = 3 } | ||
} | ||
} | ||
|
||
spec_helper.start_kong() | ||
end) | ||
|
||
teardown(function() | ||
spec_helper.stop_kong() | ||
end) | ||
|
||
it("should not block regular requests", function() | ||
local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com" }) | ||
assert.are.equal(200, status) | ||
local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36" }) | ||
assert.are.equal(200, status) | ||
local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = HELLOWORLD }) | ||
assert.are.equal(200, status) | ||
local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "curl/7.43.0" }) | ||
assert.are.equal(200, status) | ||
end) | ||
|
||
it("should block bots", function() | ||
local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "Googlebot/2.1 (+http://www.google.com/bot.html)" }) | ||
assert.are.equal(403, status) | ||
local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = FACEBOOK }) | ||
assert.are.equal(403, status) | ||
end) | ||
|
||
it("should block blacklisted user-agents", function() | ||
local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot3.com", ["user-agent"] = HELLOWORLD }) | ||
assert.are.equal(200, status) | ||
end) | ||
|
||
it("should allow whitelisted user-agents", function() | ||
local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot3.com", ["user-agent"] = FACEBOOK }) | ||
assert.are.equal(200, status) | ||
end) | ||
|
||
end) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
local cjson = require "cjson" | ||
local spec_helper = require "spec.spec_helpers" | ||
local http_client = require "kong.tools.http_client" | ||
|
||
local PROXY_URL = spec_helper.PROXY_URL | ||
local STUB_GET_URL = PROXY_URL.."/request" | ||
local API_URL = spec_helper.API_URL | ||
|
||
describe("Hooks", function() | ||
|
||
local plugin_id | ||
|
||
setup(function() | ||
spec_helper.prepare_db() | ||
spec_helper.insert_fixtures { | ||
api = { | ||
{ request_host = "bot.com", upstream_url = "http://mockbin.com" } | ||
}, | ||
plugin = { | ||
{ name = "bot-detection", config = {}, __api = 1 } | ||
} | ||
} | ||
|
||
spec_helper.start_kong() | ||
|
||
local response, status = http_client.get(API_URL.."/apis/bot.com/plugins/") | ||
assert.equals(200, status) | ||
plugin_id = cjson.decode(response).data[1].id | ||
assert.truthy(plugin_id) | ||
end) | ||
|
||
teardown(function() | ||
spec_helper.stop_kong() | ||
end) | ||
|
||
it("should block a newly entered user-agent", function() | ||
local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "helloworld" }) | ||
assert.are.equal(200, status) | ||
|
||
-- Update the plugin | ||
local _, status = http_client.patch(API_URL.."/apis/bot.com/plugins/"..plugin_id, {["config.blacklist"] = "helloworld"}) | ||
assert.are.equal(200, status) | ||
|
||
repeat | ||
local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "helloworld" }) | ||
os.execute("sleep 0.5") | ||
until(status == 403) | ||
|
||
local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "helloworld" }) | ||
assert.are.equal(403, status) | ||
end) | ||
|
||
it("should allow a newly entered user-agent", function() | ||
local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "facebookexternalhit/1.1" }) | ||
assert.are.equal(403, status) | ||
|
||
-- Update the plugin | ||
local _, status = http_client.patch(API_URL.."/apis/bot.com/plugins/"..plugin_id, {["config.whitelist"] = "facebookexternalhit/1.1"}) | ||
assert.are.equal(200, status) | ||
|
||
repeat | ||
local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "facebookexternalhit/1.1" }) | ||
os.execute("sleep 0.5") | ||
until(status == 200) | ||
|
||
local _, status = http_client.get(STUB_GET_URL, nil, { host = "bot.com", ["user-agent"] = "facebookexternalhit/1.1" }) | ||
assert.are.equal(200, status) | ||
end) | ||
|
||
end) |