Update from Dark Visitors

This commit is contained in:
dark-visitors 2025-12-02 01:25:24 +00:00
commit 8363d4fdd4

View file

@ -13,12 +13,19 @@
"frequency": "No information provided.",
"description": "Explores 'certain domains' to find web content."
},
"AI2Bot-DeepResearchEval": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Assistants",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/ai2bot-deepresearcheval"
},
"Ai2Bot-Dolma": {
"description": "Explores 'certain domains' to find web content.",
"frequency": "No information provided.",
"function": "Content is used to train open language models.",
"operator": "[Ai2](https://allenai.org/crawler)",
"respect": "Yes"
"respect": "Yes",
"function": "Content is used to train open language models.",
"frequency": "No information provided.",
"description": "Explores 'certain domains' to find web content."
},
"aiHitBot": {
"operator": "[aiHit](https://www.aihitdata.com/about)",
@ -27,20 +34,6 @@
"frequency": "No information provided.",
"description": "Scrapes data for AI systems."
},
"AmazonBuyForMe": {
"operator": "[Amazon](https://amazon.com)",
"respect": "Unclear at this time.",
"function": "AI Agents",
"frequency": "No information provided.",
"description": "Buy For Me is an AI agent that helps buy products at the direction of customers."
},
"atlassian-bot": {
"operator": "[Atlassian](https://www.atlassian.com)",
"respect": "[Yes](https://support.atlassian.com/organization-administration/docs/connect-custom-website-to-rovo/#Editing-your-robots.txt)",
"function": "AI search, assistants and agents",
"frequency": "No information provided.",
"description": "atlassian-bot is a web crawler used to index website content for its AI search, assistants and agents available in its Rovo GenAI product."
},
"amazon-kendra": {
"operator": "Amazon",
"respect": "Yes",
@ -55,6 +48,13 @@
"frequency": "No information provided.",
"description": "Includes references to crawled website when surfacing answers via Alexa; does not clearly outline other uses."
},
"AmazonBuyForMe": {
"operator": "[Amazon](https://amazon.com)",
"respect": "Unclear at this time.",
"function": "AI Agents",
"frequency": "No information provided.",
"description": "Buy For Me is an AI agent that helps buy products at the direction of customers."
},
"Andibot": {
"operator": "[Andi](https://andisearch.com/)",
"respect": "Unclear at this time",
@ -90,6 +90,13 @@
"frequency": "Unclear at this time.",
"description": "Apple has a secondary user agent, Applebot-Extended ... [that is] used to train Apple's foundation models powering generative AI features across Apple products, including Apple Intelligence, Services, and Developer Tools."
},
"atlassian-bot": {
"operator": "[Atlassian](https://www.atlassian.com)",
"respect": "[Yes](https://support.atlassian.com/organization-administration/docs/connect-custom-website-to-rovo/#Editing-your-robots.txt)",
"function": "AI search, assistants and agents",
"frequency": "No information provided.",
"description": "atlassian-bot is a web crawler used to index website content for its AI search, assistants and agents available in its Rovo GenAI product."
},
"Awario": {
"operator": "Awario",
"respect": "Unclear at this time.",
@ -146,6 +153,13 @@
"frequency": "Monthly at present.",
"description": "Web archive going back to 2008. [Cited in thousands of research papers per year](https://commoncrawl.org/research-papers)."
},
"ChatGLM-Spider": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Data Scrapers",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/chatglm-spider"
},
"ChatGPT Agent": {
"operator": "[OpenAI](https://openai.com)",
"respect": "Yes",
@ -384,6 +398,20 @@
"frequency": "No information.",
"description": "Data is used to train current and future models, removed paywalled data, PII and data that violates the company's policies."
},
"iAskBot": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "Undocumented AI Agents",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/iaskbot"
},
"iaskspider": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "Undocumented AI Agents",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/iaskspider"
},
"iaskspider/2.0": {
"description": "Used to provide answers to user queries.",
"frequency": "Unclear at this time.",
@ -412,6 +440,13 @@
"operator": "[ImageSift](https://imagesift.com)",
"respect": "[Yes](https://imagesift.com/about)"
},
"imageSpider": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Data Scrapers",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/imagespider"
},
"img2dataset": {
"description": "Downloads large sets of images into datasets for LLM training or other purposes.",
"frequency": "At the discretion of img2dataset users.",
@ -440,6 +475,20 @@
"frequency": "Indexes based on 'change signals' and user configuration.",
"description": "Indexes content to tailor AI experiences, generate content, answers and recommendations."
},
"KunatoCrawler": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "Undocumented AI Agents",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/kunatocrawler"
},
"laion-huggingface-processor": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Data Scrapers",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/laion-huggingface-processor"
},
"LAIONDownloader": {
"operator": "[Large-scale Artificial Intelligence Open Network](https://laion.ai/)",
"respect": "[No](https://laion.ai/faq/)",
@ -447,6 +496,13 @@
"frequency": "Unclear at this time.",
"description": "LAIONDownloader is a bot by LAION, a non-profit organization that provides datasets, tools and models to liberate machine learning research."
},
"LCC": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Data Scrapers",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/lcc"
},
"LinerBot": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
@ -461,6 +517,13 @@
"frequency": "Unclear at this time.",
"description": "Linguee Bot is a web crawler used by Linguee to gather training data for its AI powered translation service."
},
"LinkupBot": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Search Crawlers",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/linkupbot"
},
"meta-externalagent": {
"operator": "[Meta](https://developers.facebook.com/docs/sharing/webmasters/web-crawlers)",
"respect": "Yes",
@ -622,6 +685,13 @@
"operator": "[phind](https://www.phind.com/)",
"respect": "Unclear at this time."
},
"Poggio-Citations": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Assistants",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/poggio-citations"
},
"Poseidon Research Crawler": {
"operator": "[Poseidon Research](https://www.poseidonresearch.com)",
"description": "Lab focused on scaling the interpretability research necessary to make better AI systems possible.",
@ -651,11 +721,11 @@
"respect": "Unclear at this time."
},
"SBIntuitionsBot": {
"description": "AI development and information analysis",
"operator": "[SB Intuitions](https://www.sbintuitions.co.jp/en/)",
"respect": "[Yes](https://www.sbintuitions.co.jp/en/bot/)",
"frequency": "No information.",
"function": "Uses data gathered in AI development and information analysis.",
"operator": "[SB Intuitions](https://www.sbintuitions.co.jp/en/)"
"frequency": "No information.",
"description": "AI development and information analysis"
},
"Scrapy": {
"description": "\"AI and machine learning applications often need large amounts of quality data, and web data extraction is a fast, efficient way to build structured data sets.\"",
@ -692,6 +762,13 @@
"operator": "[Sidetrade](https://www.sidetrade.com)",
"respect": "Unclear at this time."
},
"Spider": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Data Scrapers",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/spider"
},
"TerraCotta": {
"operator": "[Ceramic AI](https://ceramic.ai/)",
"respect": "[Yes](https://github.com/CeramicTeam/CeramicTerracotta)",
@ -721,11 +798,11 @@
"description": "Makes data available for training AI models."
},
"VelenPublicWebCrawler": {
"description": "\"Our goal with this crawler is to build business datasets and machine learning models to better understand the web.\"",
"frequency": "No information.",
"function": "Scrapes data for business data sets and machine learning models.",
"operator": "[Velen Crawler](https://velen.io)",
"respect": "[Yes](https://velen.io)"
"respect": "[Yes](https://velen.io)",
"function": "Scrapes data for business data sets and machine learning models.",
"frequency": "No information.",
"description": "\"Our goal with this crawler is to build business datasets and machine learning models to better understand the web.\""
},
"WARDBot": {
"operator": "WEBSPARK",
@ -741,6 +818,13 @@
"frequency": "Unclear at this time.",
"description": "Webzio-Extended is a web crawler used by Webz.io to maintain a repository of web crawl data that it sells to other companies, including those using it to train AI models. More info can be found at https://darkvisitors.com/agents/agents/webzio-extended"
},
"webzio-extended": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Data Scrapers",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/webzio-extended"
},
"wpbot": {
"operator": "[QuantumCloud](https://www.quantumcloud.com)",
"respect": "Unclear at this time; opt out provided via [Google Form](https://forms.gle/ajBaxygz9jSR8p8G9)",
@ -748,6 +832,13 @@
"frequency": "Unclear at this time.",
"description": "wpbot is a used to support the functionality of the AI Chatbot for WordPress plugin. It supports the use of customer models, data collection and customer support."
},
"WRTNBot": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "Undocumented AI Agents",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/wrtnbot"
},
"YaK": {
"operator": "[Meltwater](https://www.meltwater.com/en/suite/consumer-intelligence)",
"respect": "Unclear at this time.",
@ -775,5 +866,12 @@
"function": "Scrapes data for search engine and LLMs.",
"frequency": "No information.",
"description": "Retrieves data used for You.com web search engine and LLMs."
},
"ZanistaBot": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Search Crawlers",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/zanistabot"
}
}
}