Skip to content

Commit

Permalink
Nginx: add rate limiting (internetarchive#9904)
Browse files Browse the repository at this point in the history
* Nginx: add rate limiting

See https://blog.nginx.org/blog/rate-limiting-nginx and
https://nginx.org/en/docs/http/ngx_http_limit_req_module.html

Adds comments in docker compose + web w/ instructions for re keeping nginx and docker replicas in sync

---------

Co-authored-by: Mek <michael.karpeles@gmail.com>
  • Loading branch information
scottbarnes and mekarpeles authored Sep 25, 2024
1 parent 1aae0db commit b428d1f
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 4 deletions.
2 changes: 2 additions & 0 deletions compose.production.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ services:
- ../olsystem:/olsystem
- /1:/1
deploy:
# Note: the replicas here must be kept in sync with the `upstream covers_backend`
# value in `docker/covers_nginx.conf`.
replicas: 2

covers_nginx:
Expand Down
29 changes: 27 additions & 2 deletions docker/covers_nginx.conf
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,15 @@ server {
ssl_prefer_server_ciphers on;
}

# Docker's internal load balancing ends up with unbalanced connections eventually.
# This must be kept in sync with the `replicas` value in `compose.production.yaml`
# for the `covers` service.
upstream covers_backend {
least_conn;
server openlibrary-covers-1:7075;
server openlibrary-covers-2:7075;
}

server {
listen 80;
listen 443;
Expand All @@ -25,8 +34,15 @@ server {

keepalive_timeout 5;

# Return 429 errors as JSON.
error_page 429 = @429;
location @429 {
default_type application/json;
return 429 '{"status": 429, "message": "Too Many Requests. Please email us at info@archive.org"}';
}

location / {
proxy_pass http://covers:7075;
proxy_pass http://covers_backend;
proxy_set_header Host $http_host;

# Gunicorn takes IP from this header
Expand All @@ -37,8 +53,17 @@ server {
proxy_set_header X-Scheme $scheme;

if ($http_user_agent ~ (Bytespider) ) {
return 429;
return 444;
}

if ($http_user_agent ~ (CloudFront) ) {
return 444;
}


# Covers rate limit.
limit_req zone=cover_limit burst=400 nodelay;
limit_req_status 429;
}

location ^~ /.well-known/acme-challenge/ {
Expand Down
21 changes: 20 additions & 1 deletion docker/nginx.conf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ error_log /var/log/nginx/error.log;
pid /var/run/nginx.pid;

events {
worker_connections 1024;
worker_connections 2048;
# multi_accept on;
}

Expand Down Expand Up @@ -44,6 +44,25 @@ http {
# Black-listed IPs
include /olsystem/etc/nginx/deny.conf;

# Rate limiting: https://nginx.org/en/docs/http/ngx_http_limit_req_module.html
# No rate limit when IP obfuscation is not applied, as every IP is 255.0.0.0.
# These rules only do anything if invoked, e.g., in web_nginx.conf.
# TLDR: these rules can be disabled in `docker/web_nginx.conf`
# and `docker/covers_nginx.conf`.
geo $should_apply_limit {
255.0.0.0 0;
default 1;
}

map $should_apply_limit $rate_limit_key {
0 '';
1 $binary_remote_addr;
}

limit_req_zone $rate_limit_key zone=web_limit:10m rate=200r/m;
# Set a more permissive limit for covers because some pages might load 20+ covers.
limit_req_zone $rate_limit_key zone=cover_limit:10m rate=400r/m;

# Things are mounted into here by the docker compose file
include /etc/nginx/sites-enabled/*;
}
15 changes: 14 additions & 1 deletion docker/web_nginx.conf
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,14 @@ server {
if ($api_call = "http:noapi") {
rewrite ^(.*)$ https://$http_host$1 last;
}

# Return 429 errors as JSON.
error_page 429 = @429;
location @429 {
default_type application/json;
return 429 '{"status": 429, "message": "Too Many Requests. Consider using https://openlibrary.org/developers/dumps."}';
}

location / {
proxy_pass http://webnodes;
proxy_set_header Host $http_host;
Expand All @@ -76,8 +84,13 @@ server {
proxy_set_header X-Scheme $scheme;

if ($http_user_agent ~ (Bytespider) ) {
return 429;
return 444;
}


# Web rate limit.
limit_req zone=web_limit burst=200 nodelay;
limit_req_status 429;
}

location ^~ /.well-known/acme-challenge/ {
Expand Down

0 comments on commit b428d1f

Please sign in to comment.