diff --git a/compose.production.yaml b/compose.production.yaml index 3056bdca5ca..9df7c18585e 100644 --- a/compose.production.yaml +++ b/compose.production.yaml @@ -73,6 +73,8 @@ services: - ../olsystem:/olsystem - /1:/1 deploy: + # Note: the replicas here must be kept in sync with the `upstream covers_backend` + # value in `docker/covers_nginx.conf`. replicas: 2 covers_nginx: diff --git a/docker/covers_nginx.conf b/docker/covers_nginx.conf index 0d6ec59e2cc..33f6c943933 100644 --- a/docker/covers_nginx.conf +++ b/docker/covers_nginx.conf @@ -16,6 +16,15 @@ server { ssl_prefer_server_ciphers on; } +# Docker's internal load balancing ends up with unbalanced connections eventually. +# This must be kept in sync with the `replicas` value in `compose.production.yaml` +# for the `covers` service. +upstream covers_backend { + least_conn; + server openlibrary-covers-1:7075; + server openlibrary-covers-2:7075; +} + server { listen 80; listen 443; @@ -25,8 +34,15 @@ server { keepalive_timeout 5; + # Return 429 errors as JSON. + error_page 429 = @429; + location @429 { + default_type application/json; + return 429 '{"status": 429, "message": "Too Many Requests. Please email us at info@archive.org"}'; + } + location / { - proxy_pass http://covers:7075; + proxy_pass http://covers_backend; proxy_set_header Host $http_host; # Gunicorn takes IP from this header @@ -37,8 +53,17 @@ server { proxy_set_header X-Scheme $scheme; if ($http_user_agent ~ (Bytespider) ) { - return 429; + return 444; } + + if ($http_user_agent ~ (CloudFront) ) { + return 444; + } + + + # Covers rate limit. + limit_req zone=cover_limit burst=400 nodelay; + limit_req_status 429; } location ^~ /.well-known/acme-challenge/ { diff --git a/docker/nginx.conf b/docker/nginx.conf index 406278a561a..7a4e8d13af5 100644 --- a/docker/nginx.conf +++ b/docker/nginx.conf @@ -11,7 +11,7 @@ error_log /var/log/nginx/error.log; pid /var/run/nginx.pid; events { - worker_connections 1024; + worker_connections 2048; # multi_accept on; } @@ -44,6 +44,25 @@ http { # Black-listed IPs include /olsystem/etc/nginx/deny.conf; + # Rate limiting: https://nginx.org/en/docs/http/ngx_http_limit_req_module.html + # No rate limit when IP obfuscation is not applied, as every IP is 255.0.0.0. + # These rules only do anything if invoked, e.g., in web_nginx.conf. + # TLDR: these rules can be disabled in `docker/web_nginx.conf` + # and `docker/covers_nginx.conf`. + geo $should_apply_limit { + 255.0.0.0 0; + default 1; + } + + map $should_apply_limit $rate_limit_key { + 0 ''; + 1 $binary_remote_addr; + } + + limit_req_zone $rate_limit_key zone=web_limit:10m rate=200r/m; + # Set a more permissive limit for covers because some pages might load 20+ covers. + limit_req_zone $rate_limit_key zone=cover_limit:10m rate=400r/m; + # Things are mounted into here by the docker compose file include /etc/nginx/sites-enabled/*; } diff --git a/docker/web_nginx.conf b/docker/web_nginx.conf index 960670eb739..654ee5168a5 100644 --- a/docker/web_nginx.conf +++ b/docker/web_nginx.conf @@ -64,6 +64,14 @@ server { if ($api_call = "http:noapi") { rewrite ^(.*)$ https://$http_host$1 last; } + + # Return 429 errors as JSON. + error_page 429 = @429; + location @429 { + default_type application/json; + return 429 '{"status": 429, "message": "Too Many Requests. Consider using https://openlibrary.org/developers/dumps."}'; + } + location / { proxy_pass http://webnodes; proxy_set_header Host $http_host; @@ -76,8 +84,13 @@ server { proxy_set_header X-Scheme $scheme; if ($http_user_agent ~ (Bytespider) ) { - return 429; + return 444; } + + + # Web rate limit. + limit_req zone=web_limit burst=200 nodelay; + limit_req_status 429; } location ^~ /.well-known/acme-challenge/ {