From a6cf6b204b9581db5b0bb7353ba69dd237caabe8 Mon Sep 17 00:00:00 2001 From: fiskhandlarn Date: Tue, 25 Nov 2025 16:47:04 +0100 Subject: [PATCH] test: update test nginx conf --- code/test_files/nginx-block-ai-bots.conf | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/code/test_files/nginx-block-ai-bots.conf b/code/test_files/nginx-block-ai-bots.conf index c569b15..d5e3cc6 100644 --- a/code/test_files/nginx-block-ai-bots.conf +++ b/code/test_files/nginx-block-ai-bots.conf @@ -1,3 +1,13 @@ +set $block 0; + if ($http_user_agent ~* "(AI2Bot|Ai2Bot\-Dolma|Amazonbot|anthropic\-ai|Applebot|Applebot\-Extended|Bytespider|CCBot|ChatGPT\-User|Claude\-Web|ClaudeBot|cohere\-ai|Diffbot|FacebookBot|facebookexternalhit|FriendlyCrawler|Google\-Extended|GoogleOther|GoogleOther\-Image|GoogleOther\-Video|GPTBot|iaskspider/2\.0|ICC\-Crawler|ImagesiftBot|img2dataset|ISSCyberRiskCrawler|Kangaroo\ Bot|Meta\-ExternalAgent|Meta\-ExternalFetcher|OAI\-SearchBot|omgili|omgilibot|Perplexity\-User|PerplexityBot|PetalBot|Scrapy|Sidetrade\ indexer\ bot|Timpibot|VelenPublicWebCrawler|Webzio\-Extended|YouBot|crawler\.with\.dots|star\*\*\*crawler|Is\ this\ a\ crawler\?|a\[mazing\]\{42\}\(robot\)|2\^32\$|curl\|sudo\ bash)") { + set $block 1; +} + +if ($request_uri = "/robots.txt") { + set $block 0; +} + +if ($block) { return 403; } \ No newline at end of file