From 8363d4fdd404c03af92115c8b9115952e47749ec Mon Sep 17 00:00:00 2001
From: dark-visitors <dark-visitors@users.noreply.github.com>
Date: Tue, 2 Dec 2025 01:25:24 +0000
Subject: [PATCH] Update from Dark Visitors

---
 robots.json | 150 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 124 insertions(+), 26 deletions(-)

diff --git a/robots.json b/robots.json
index 3b0bc85..50aadc3 100644
--- a/robots.json
+++ b/robots.json
@@ -13,12 +13,19 @@
         "frequency": "No information provided.",
         "description": "Explores 'certain domains' to find web content."
     },
+    "AI2Bot-DeepResearchEval": {
+        "operator": "Unclear at this time.",
+        "respect": "Unclear at this time.",
+        "function": "AI Assistants",
+        "frequency": "Unclear at this time.",
+        "description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/ai2bot-deepresearcheval"
+    },
     "Ai2Bot-Dolma": {
-        "description": "Explores 'certain domains' to find web content.",
-        "frequency": "No information provided.",
-        "function": "Content is used to train open language models.",
         "operator": "[Ai2](https://allenai.org/crawler)",
-        "respect": "Yes"
+        "respect": "Yes",
+        "function": "Content is used to train open language models.",
+        "frequency": "No information provided.",
+        "description": "Explores 'certain domains' to find web content."
     },
     "aiHitBot": {
         "operator": "[aiHit](https://www.aihitdata.com/about)",
@@ -27,20 +34,6 @@
         "frequency": "No information provided.",
         "description": "Scrapes data for AI systems."
     },
-    "AmazonBuyForMe": {
-        "operator": "[Amazon](https://amazon.com)",
-        "respect": "Unclear at this time.",
-        "function": "AI Agents",
-        "frequency": "No information provided.",
-        "description": "Buy For Me is an AI agent that helps buy products at the direction of customers."
-    },
-    "atlassian-bot": {
-        "operator": "[Atlassian](https://www.atlassian.com)",
-        "respect": "[Yes](https://support.atlassian.com/organization-administration/docs/connect-custom-website-to-rovo/#Editing-your-robots.txt)",
-        "function": "AI search, assistants and agents",
-        "frequency": "No information provided.",
-        "description": "atlassian-bot is a web crawler used to index website content for its AI search, assistants and agents available in its Rovo GenAI product."
-    },
     "amazon-kendra": {
         "operator": "Amazon",
         "respect": "Yes",
@@ -55,6 +48,13 @@
         "frequency": "No information provided.",
         "description": "Includes references to crawled website when surfacing answers via Alexa; does not clearly outline other uses."
     },
+    "AmazonBuyForMe": {
+        "operator": "[Amazon](https://amazon.com)",
+        "respect": "Unclear at this time.",
+        "function": "AI Agents",
+        "frequency": "No information provided.",
+        "description": "Buy For Me is an AI agent that helps buy products at the direction of customers."
+    },
     "Andibot": {
         "operator": "[Andi](https://andisearch.com/)",
         "respect": "Unclear at this time",
@@ -90,6 +90,13 @@
         "frequency": "Unclear at this time.",
         "description": "Apple has a secondary user agent, Applebot-Extended ... [that is] used to train Apple's foundation models powering generative AI features across Apple products, including Apple Intelligence, Services, and Developer Tools."
     },
+    "atlassian-bot": {
+        "operator": "[Atlassian](https://www.atlassian.com)",
+        "respect": "[Yes](https://support.atlassian.com/organization-administration/docs/connect-custom-website-to-rovo/#Editing-your-robots.txt)",
+        "function": "AI search, assistants and agents",
+        "frequency": "No information provided.",
+        "description": "atlassian-bot is a web crawler used to index website content for its AI search, assistants and agents available in its Rovo GenAI product."
+    },
     "Awario": {
         "operator": "Awario",
         "respect": "Unclear at this time.",
@@ -146,6 +153,13 @@
         "frequency": "Monthly at present.",
         "description": "Web archive going back to 2008. [Cited in thousands of research papers per year](https://commoncrawl.org/research-papers)."
     },
+    "ChatGLM-Spider": {
+        "operator": "Unclear at this time.",
+        "respect": "Unclear at this time.",
+        "function": "AI Data Scrapers",
+        "frequency": "Unclear at this time.",
+        "description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/chatglm-spider"
+    },
     "ChatGPT Agent": {
         "operator": "[OpenAI](https://openai.com)",
         "respect": "Yes",
@@ -384,6 +398,20 @@
         "frequency": "No information.",
         "description": "Data is used to train current and future models, removed paywalled data, PII and data that violates the company's policies."
     },
+    "iAskBot": {
+        "operator": "Unclear at this time.",
+        "respect": "Unclear at this time.",
+        "function": "Undocumented AI Agents",
+        "frequency": "Unclear at this time.",
+        "description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/iaskbot"
+    },
+    "iaskspider": {
+        "operator": "Unclear at this time.",
+        "respect": "Unclear at this time.",
+        "function": "Undocumented AI Agents",
+        "frequency": "Unclear at this time.",
+        "description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/iaskspider"
+    },
     "iaskspider/2.0": {
         "description": "Used to provide answers to user queries.",
         "frequency": "Unclear at this time.",
@@ -412,6 +440,13 @@
         "operator": "[ImageSift](https://imagesift.com)",
         "respect": "[Yes](https://imagesift.com/about)"
     },
+    "imageSpider": {
+        "operator": "Unclear at this time.",
+        "respect": "Unclear at this time.",
+        "function": "AI Data Scrapers",
+        "frequency": "Unclear at this time.",
+        "description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/imagespider"
+    },
     "img2dataset": {
         "description": "Downloads large sets of images into datasets for LLM training or other purposes.",
         "frequency": "At the discretion of img2dataset users.",
@@ -440,6 +475,20 @@
         "frequency": "Indexes based on 'change signals' and user configuration.",
         "description": "Indexes content to tailor AI experiences, generate content, answers and recommendations."
     },
+    "KunatoCrawler": {
+        "operator": "Unclear at this time.",
+        "respect": "Unclear at this time.",
+        "function": "Undocumented AI Agents",
+        "frequency": "Unclear at this time.",
+        "description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/kunatocrawler"
+    },
+    "laion-huggingface-processor": {
+        "operator": "Unclear at this time.",
+        "respect": "Unclear at this time.",
+        "function": "AI Data Scrapers",
+        "frequency": "Unclear at this time.",
+        "description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/laion-huggingface-processor"
+    },
     "LAIONDownloader": {
         "operator": "[Large-scale Artificial Intelligence Open Network](https://laion.ai/)",
         "respect": "[No](https://laion.ai/faq/)",
@@ -447,6 +496,13 @@
         "frequency": "Unclear at this time.",
         "description": "LAIONDownloader is a bot by LAION, a non-profit organization that provides datasets, tools and models to liberate machine learning research."
     },
+    "LCC": {
+        "operator": "Unclear at this time.",
+        "respect": "Unclear at this time.",
+        "function": "AI Data Scrapers",
+        "frequency": "Unclear at this time.",
+        "description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/lcc"
+    },
     "LinerBot": {
         "operator": "Unclear at this time.",
         "respect": "Unclear at this time.",
@@ -461,6 +517,13 @@
         "frequency": "Unclear at this time.",
         "description": "Linguee Bot is a web crawler used by Linguee to gather training data for its AI powered translation service."
     },
+    "LinkupBot": {
+        "operator": "Unclear at this time.",
+        "respect": "Unclear at this time.",
+        "function": "AI Search Crawlers",
+        "frequency": "Unclear at this time.",
+        "description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/linkupbot"
+    },
     "meta-externalagent": {
         "operator": "[Meta](https://developers.facebook.com/docs/sharing/webmasters/web-crawlers)",
         "respect": "Yes",
@@ -622,6 +685,13 @@
         "operator": "[phind](https://www.phind.com/)",
         "respect": "Unclear at this time."
     },
+    "Poggio-Citations": {
+        "operator": "Unclear at this time.",
+        "respect": "Unclear at this time.",
+        "function": "AI Assistants",
+        "frequency": "Unclear at this time.",
+        "description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/poggio-citations"
+    },
     "Poseidon Research Crawler": {
         "operator": "[Poseidon Research](https://www.poseidonresearch.com)",
         "description": "Lab focused on scaling the interpretability research necessary to make better AI systems possible.",
@@ -651,11 +721,11 @@
         "respect": "Unclear at this time."
     },
     "SBIntuitionsBot": {
-        "description": "AI development and information analysis",
+        "operator": "[SB Intuitions](https://www.sbintuitions.co.jp/en/)",
         "respect": "[Yes](https://www.sbintuitions.co.jp/en/bot/)",
-        "frequency": "No information.",
         "function": "Uses data gathered in AI development and information analysis.",
-        "operator": "[SB Intuitions](https://www.sbintuitions.co.jp/en/)"
+        "frequency": "No information.",
+        "description": "AI development and information analysis"
     },
     "Scrapy": {
         "description": "\"AI and machine learning applications often need large amounts of quality data, and web data extraction is a fast, efficient way to build structured data sets.\"",
@@ -692,6 +762,13 @@
         "operator": "[Sidetrade](https://www.sidetrade.com)",
         "respect": "Unclear at this time."
     },
+    "Spider": {
+        "operator": "Unclear at this time.",
+        "respect": "Unclear at this time.",
+        "function": "AI Data Scrapers",
+        "frequency": "Unclear at this time.",
+        "description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/spider"
+    },
     "TerraCotta": {
         "operator": "[Ceramic AI](https://ceramic.ai/)",
         "respect": "[Yes](https://github.com/CeramicTeam/CeramicTerracotta)",
@@ -721,11 +798,11 @@
         "description": "Makes data available for training AI models."
     },
     "VelenPublicWebCrawler": {
-        "description": "\"Our goal with this crawler is to build business datasets and machine learning models to better understand the web.\"",
-        "frequency": "No information.",
-        "function": "Scrapes data for business data sets and machine learning models.",
         "operator": "[Velen Crawler](https://velen.io)",
-        "respect": "[Yes](https://velen.io)"
+        "respect": "[Yes](https://velen.io)",
+        "function": "Scrapes data for business data sets and machine learning models.",
+        "frequency": "No information.",
+        "description": "\"Our goal with this crawler is to build business datasets and machine learning models to better understand the web.\""
     },
     "WARDBot": {
         "operator": "WEBSPARK",
@@ -741,6 +818,13 @@
         "frequency": "Unclear at this time.",
         "description": "Webzio-Extended is a web crawler used by Webz.io to maintain a repository of web crawl data that it sells to other companies, including those using it to train AI models. More info can be found at https://darkvisitors.com/agents/agents/webzio-extended"
     },
+    "webzio-extended": {
+        "operator": "Unclear at this time.",
+        "respect": "Unclear at this time.",
+        "function": "AI Data Scrapers",
+        "frequency": "Unclear at this time.",
+        "description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/webzio-extended"
+    },
     "wpbot": {
         "operator": "[QuantumCloud](https://www.quantumcloud.com)",
         "respect": "Unclear at this time; opt out provided via [Google Form](https://forms.gle/ajBaxygz9jSR8p8G9)",
@@ -748,6 +832,13 @@
         "frequency": "Unclear at this time.",
         "description": "wpbot is a used to support the functionality of the AI Chatbot for WordPress plugin. It supports the use of customer models, data collection and customer support."
     },
+    "WRTNBot": {
+        "operator": "Unclear at this time.",
+        "respect": "Unclear at this time.",
+        "function": "Undocumented AI Agents",
+        "frequency": "Unclear at this time.",
+        "description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/wrtnbot"
+    },
     "YaK": {
         "operator": "[Meltwater](https://www.meltwater.com/en/suite/consumer-intelligence)",
         "respect": "Unclear at this time.",
@@ -775,5 +866,12 @@
         "function": "Scrapes data for search engine and LLMs.",
         "frequency": "No information.",
         "description": "Retrieves data used for You.com web search engine and LLMs."
+    },
+    "ZanistaBot": {
+        "operator": "Unclear at this time.",
+        "respect": "Unclear at this time.",
+        "function": "AI Search Crawlers",
+        "frequency": "Unclear at this time.",
+        "description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/zanistabot"
     }
-}
+}
\ No newline at end of file