mirror of
https://github.com/ai-robots-txt/ai.robots.txt.git
synced 2025-12-29 12:18:33 +01:00
Update from Dark Visitors
This commit is contained in:
parent
4ed17b8e4a
commit
7eb2099d3f
1 changed files with 54 additions and 12 deletions
66
robots.json
66
robots.json
|
|
@ -55,6 +55,13 @@
|
|||
"frequency": "Unclear at this time.",
|
||||
"description": "Apple has a secondary user agent, Applebot-Extended ... [that is] used to train Apple's foundation models powering generative AI features across Apple products, including Apple Intelligence, Services, and Developer Tools."
|
||||
},
|
||||
"Awario": {
|
||||
"operator": "Awario",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "AI Data Scrapers",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "Awario is an AI data scraper operated by Awario. It's not currently known to be artificially intelligent or AI-related. If you think that's incorrect or can provide more detail about its purpose, please contact us. More info can be found at https://darkvisitors.com/agents/agents/awario"
|
||||
},
|
||||
"bedrockbot": {
|
||||
"operator": "[Amazon](https://amazon.com)",
|
||||
"respect": "[Yes](https://docs.aws.amazon.com/bedrock/latest/userguide/webcrawl-data-source-connector.html#configuration-webcrawl-connector)",
|
||||
|
|
@ -146,6 +153,20 @@
|
|||
"frequency": "Unclear at this time.",
|
||||
"description": "Provides crawling services for any purpose, probably including AI model training."
|
||||
},
|
||||
"Datenbank Crawler": {
|
||||
"operator": "Datenbank",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "AI Data Scrapers",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "Datenbank Crawler is an AI data scraper operated by Datenbank. It's not currently known to be artificially intelligent or AI-related. If you think that's incorrect or can provide more detail about its purpose, please contact us. More info can be found at https://darkvisitors.com/agents/agents/datenbank-crawler"
|
||||
},
|
||||
"Devin": {
|
||||
"operator": "Devin AI",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "AI Assistants",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "Devin is an AI assistant operated by Devin AI. It's not currently known to be artificially intelligent or AI-related. If you think that's incorrect or can provide more detail about its purpose, please contact us. More info can be found at https://darkvisitors.com/agents/agents/devin"
|
||||
},
|
||||
"Diffbot": {
|
||||
"operator": "[Diffbot](https://www.diffbot.com/)",
|
||||
"respect": "At the discretion of Diffbot users.",
|
||||
|
|
@ -160,6 +181,13 @@
|
|||
"frequency": "Unclear at this time.",
|
||||
"description": "DuckAssistBot is used by DuckDuckGo's DuckAssist feature to fetch content and generate realtime AI answers to user searches. More info can be found at https://darkvisitors.com/agents/agents/duckassistbot"
|
||||
},
|
||||
"Echobot Bot": {
|
||||
"operator": "Echobox",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "AI Data Scrapers",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "Echobot Bot is an AI data scraper operated by Echobox. It's not currently known to be artificially intelligent or AI-related. If you think that's incorrect or can provide more detail about its purpose, please contact us. More info can be found at https://darkvisitors.com/agents/agents/echobot-bot"
|
||||
},
|
||||
"EchoboxBot": {
|
||||
"operator": "[Echobox](https://echobox.com)",
|
||||
"respect": "Unclear at this time.",
|
||||
|
|
@ -252,11 +280,11 @@
|
|||
"respect": "No"
|
||||
},
|
||||
"ICC-Crawler": {
|
||||
"description": "Use the collected data for artificial intelligence technologies; provide data to third parties, including commercial companies; those companies can use the data for their own business.",
|
||||
"frequency": "No information.",
|
||||
"function": "Scrapes data to train and support AI technologies.",
|
||||
"operator": "[NICT](https://nict.go.jp)",
|
||||
"respect": "Yes"
|
||||
"respect": "Yes",
|
||||
"function": "Scrapes data to train and support AI technologies.",
|
||||
"frequency": "No information.",
|
||||
"description": "Use the collected data for artificial intelligence technologies; provide data to third parties, including commercial companies; those companies can use the data for their own business."
|
||||
},
|
||||
"ImagesiftBot": {
|
||||
"description": "Once images and text are downloaded from a webpage, ImageSift analyzes this data from the page and stores the information in an index. Our web intelligence products use this index to enable search and retrieval of similar images.",
|
||||
|
|
@ -314,6 +342,13 @@
|
|||
"frequency": "Unclear at this time.",
|
||||
"description": "Meta-ExternalFetcher is dispatched by Meta AI products in response to user prompts, when they need to fetch an individual links. More info can be found at https://darkvisitors.com/agents/agents/meta-externalfetcher"
|
||||
},
|
||||
"MistralAI-User": {
|
||||
"operator": "Mistral",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "AI Assistants",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "MistralAI-User is an AI assistant operated by Mistral. It's not currently known to be artificially intelligent or AI-related. If you think that's incorrect or can provide more detail about its purpose, please contact us. More info can be found at https://darkvisitors.com/agents/agents/mistralai-user"
|
||||
},
|
||||
"MistralAI-User/1.0": {
|
||||
"operator": "Mistral AI",
|
||||
"function": "Takes action based on user prompts.",
|
||||
|
|
@ -399,11 +434,11 @@
|
|||
"description": "Crawls sites to surface as results in Perplexity."
|
||||
},
|
||||
"PetalBot": {
|
||||
"description": "Operated by Huawei to provide search and AI assistant services.",
|
||||
"frequency": "No explicit frequency provided.",
|
||||
"function": "Used to provide recommendations in Hauwei assistant and AI search services.",
|
||||
"operator": "[Huawei](https://huawei.com/)",
|
||||
"respect": "Yes"
|
||||
"respect": "Yes",
|
||||
"function": "Used to provide recommendations in Hauwei assistant and AI search services.",
|
||||
"frequency": "No explicit frequency provided.",
|
||||
"description": "Operated by Huawei to provide search and AI assistant services."
|
||||
},
|
||||
"PhindBot": {
|
||||
"description": "Company offers an AI agent that uses AI and generate extra web query on the fly",
|
||||
|
|
@ -420,11 +455,11 @@
|
|||
"respect": "Unclear at this time."
|
||||
},
|
||||
"QualifiedBot": {
|
||||
"description": "Operated by Qualified as part of their suite of AI product offerings.",
|
||||
"frequency": "No explicit frequency provided.",
|
||||
"function": "Company offers AI agents and other related products; usage can be assumed to support said products.",
|
||||
"operator": "[Qualified](https://www.qualified.com)",
|
||||
"respect": "Unclear at this time."
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "Company offers AI agents and other related products; usage can be assumed to support said products.",
|
||||
"frequency": "No explicit frequency provided.",
|
||||
"description": "Operated by Qualified as part of their suite of AI product offerings."
|
||||
},
|
||||
"QuillBot": {
|
||||
"description": "Operated by QuillBot as part of their suite of AI product offerings.",
|
||||
|
|
@ -524,6 +559,13 @@
|
|||
"operator": "[Velen Crawler](https://velen.io)",
|
||||
"respect": "[Yes](https://velen.io)"
|
||||
},
|
||||
"WARDBot": {
|
||||
"operator": "WEBSPARK",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "AI Data Scrapers",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "WARDBot is an AI data scraper operated by WEBSPARK. It's not currently known to be artificially intelligent or AI-related. If you think that's incorrect or can provide more detail about its purpose, please contact us. More info can be found at https://darkvisitors.com/agents/agents/wardbot"
|
||||
},
|
||||
"Webzio-Extended": {
|
||||
"operator": "Unclear at this time.",
|
||||
"respect": "Unclear at this time.",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue