From e56028196877f0d54b22c1ee5018d1fa1cb3a72f Mon Sep 17 00:00:00 2001 From: "Ben W. Brumfield" Date: Mon, 27 May 2024 12:16:17 -0500 Subject: [PATCH] Add spiders and handle no user agent for capybara on #4130 --- config/initializers/rack_attack.rb | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/config/initializers/rack_attack.rb b/config/initializers/rack_attack.rb index 318d19ed71..5ee809953b 100644 --- a/config/initializers/rack_attack.rb +++ b/config/initializers/rack_attack.rb @@ -31,18 +31,17 @@ class Rack::Attack # see https://github.com/benwbrum/fromthepage/issues/4130 ### Throttle requests by agent ClaudeBot throttle('requests by agent ClaudeBot', limit: 6, period: 1.minute) do |req| - req.user_agent.match?(/ClaudeBot/) + req.user_agent&.match?(/ClaudeBot/) end ### Throttle requests by agent ByteDance throttle('requests by agent ByteDance', limit: 6, period: 1.minute) do |req| - req.user_agent.match?(/Bytespider/) + req.user_agent&.match?(/Bytespider/) end ### Throttle requests by low-rent SEO bots - throttle('requests by agent ByteDance', limit: 6, period: 1.minute) do |req| - req.user_agent.match?(/SemrushBot/) || - req.user_agent.match?(/DataForSeoBot/) + throttle('requests by various SEO bots', limit: 15, period: 1.minute) do |req| + req.user_agent&.match?(/(SemrushBot|AhrefsBot|DataForSeoBot|AhrefsBot|DotBot|MJ12bot|PetalBot)/) end