diff --git a/compose.yaml b/compose.yaml new file mode 100644 index 0000000..8acf569 --- /dev/null +++ b/compose.yaml @@ -0,0 +1,9 @@ +services: + blog: + image: git.toomuchtaco.net/taco/taco-blog:latest + restart: 'unless-stopped' + ports: + - 1313:80 + volumes: + - ./nginx:/etc/nginx/conf.d:ro + - ./nginx.conf:/etc/nginx/nginx.conf:ro \ No newline at end of file diff --git a/nginx.conf b/nginx.conf new file mode 100644 index 0000000..5d56476 --- /dev/null +++ b/nginx.conf @@ -0,0 +1,51 @@ +user nginx; +worker_processes auto; + +error_log /var/log/nginx/error.log notice; +pid /var/run/nginx.pid; + + +events { + worker_connections 1024; +} + + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + access_log /var/log/nginx/access.log main; + + sendfile on; + server_tokens off; + + keepalive_timeout 65; + + # ── AI bot User-Agent map ────────────────────────────────────────── + # Must be inside http {}, NOT inside server {} or location {} + map $http_user_agent $bad_bot { + default 0; # allow everything by default + ~*GPTBot 1; + ~*ChatGPT-User 1; + ~*ClaudeBot 1; + ~*Claude-Web 1; + ~*anthropic-ai 1; + ~*CCBot 1; + ~*Google-Extended 1; + ~*PerplexityBot 1; + ~*Amazonbot 1; + ~*Bytespider 1; + ~*YouBot 1; + ~*Applebot 1; + ~*DuckAssistBot 1; + ~*meta-externalagent 1; + ~*MistralAI-Spider 1; + ~*oai-searchbot 1; + } + + include /etc/nginx/conf.d/*.conf; +} diff --git a/nginx/default.conf b/nginx/default.conf new file mode 100644 index 0000000..d782b47 --- /dev/null +++ b/nginx/default.conf @@ -0,0 +1,33 @@ +server { + listen 80; + listen [::]:80; + server_name localhost; + + add_header X-Robots-Tag "noai, noimageai" always; + + # ── robots.txt — exempt from bot blocking ────────────────────────── + location = /robots.txt { + try_files $uri =404; + access_log off; + log_not_found off; + } + + # ── Block known AI bots ──────────────────────────────────────────── + # "if" is safe here — we're only returning a status, not using + # proxy_pass, rewrite, or other directives that interact poorly with if + location / { + if ($bad_bot) { + return 403 "Forbidden"; + } + + root /usr/share/nginx/html; + index index.html index.htm; + } + + # redirect server error pages to the static page /50x.html + # + error_page 500 502 503 504 /50x.html; + location = /50x.html { + root /usr/share/nginx/html; + } +} diff --git a/nginx/rates.conf b/nginx/rates.conf new file mode 100644 index 0000000..285d120 --- /dev/null +++ b/nginx/rates.conf @@ -0,0 +1,8 @@ +# General request limit per IP +limit_req_zone $binary_remote_addr zone=general:10m rate=30r/s; + +# Static asset scraping control +limit_req_zone $binary_remote_addr zone=static:10m rate=10r/s; + +# API abuse protection +limit_req_zone $binary_remote_addr zone=api:10m rate=5r/s;