mirror of
https://github.com/ai-robots-txt/ai.robots.txt.git
synced 2025-12-29 12:18:33 +01:00
Update from Dark Visitors
This commit is contained in:
parent
2ccd443581
commit
8363d4fdd4
1 changed files with 124 additions and 26 deletions
150
robots.json
150
robots.json
|
|
@ -13,12 +13,19 @@
|
|||
"frequency": "No information provided.",
|
||||
"description": "Explores 'certain domains' to find web content."
|
||||
},
|
||||
"AI2Bot-DeepResearchEval": {
|
||||
"operator": "Unclear at this time.",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "AI Assistants",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/ai2bot-deepresearcheval"
|
||||
},
|
||||
"Ai2Bot-Dolma": {
|
||||
"description": "Explores 'certain domains' to find web content.",
|
||||
"frequency": "No information provided.",
|
||||
"function": "Content is used to train open language models.",
|
||||
"operator": "[Ai2](https://allenai.org/crawler)",
|
||||
"respect": "Yes"
|
||||
"respect": "Yes",
|
||||
"function": "Content is used to train open language models.",
|
||||
"frequency": "No information provided.",
|
||||
"description": "Explores 'certain domains' to find web content."
|
||||
},
|
||||
"aiHitBot": {
|
||||
"operator": "[aiHit](https://www.aihitdata.com/about)",
|
||||
|
|
@ -27,20 +34,6 @@
|
|||
"frequency": "No information provided.",
|
||||
"description": "Scrapes data for AI systems."
|
||||
},
|
||||
"AmazonBuyForMe": {
|
||||
"operator": "[Amazon](https://amazon.com)",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "AI Agents",
|
||||
"frequency": "No information provided.",
|
||||
"description": "Buy For Me is an AI agent that helps buy products at the direction of customers."
|
||||
},
|
||||
"atlassian-bot": {
|
||||
"operator": "[Atlassian](https://www.atlassian.com)",
|
||||
"respect": "[Yes](https://support.atlassian.com/organization-administration/docs/connect-custom-website-to-rovo/#Editing-your-robots.txt)",
|
||||
"function": "AI search, assistants and agents",
|
||||
"frequency": "No information provided.",
|
||||
"description": "atlassian-bot is a web crawler used to index website content for its AI search, assistants and agents available in its Rovo GenAI product."
|
||||
},
|
||||
"amazon-kendra": {
|
||||
"operator": "Amazon",
|
||||
"respect": "Yes",
|
||||
|
|
@ -55,6 +48,13 @@
|
|||
"frequency": "No information provided.",
|
||||
"description": "Includes references to crawled website when surfacing answers via Alexa; does not clearly outline other uses."
|
||||
},
|
||||
"AmazonBuyForMe": {
|
||||
"operator": "[Amazon](https://amazon.com)",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "AI Agents",
|
||||
"frequency": "No information provided.",
|
||||
"description": "Buy For Me is an AI agent that helps buy products at the direction of customers."
|
||||
},
|
||||
"Andibot": {
|
||||
"operator": "[Andi](https://andisearch.com/)",
|
||||
"respect": "Unclear at this time",
|
||||
|
|
@ -90,6 +90,13 @@
|
|||
"frequency": "Unclear at this time.",
|
||||
"description": "Apple has a secondary user agent, Applebot-Extended ... [that is] used to train Apple's foundation models powering generative AI features across Apple products, including Apple Intelligence, Services, and Developer Tools."
|
||||
},
|
||||
"atlassian-bot": {
|
||||
"operator": "[Atlassian](https://www.atlassian.com)",
|
||||
"respect": "[Yes](https://support.atlassian.com/organization-administration/docs/connect-custom-website-to-rovo/#Editing-your-robots.txt)",
|
||||
"function": "AI search, assistants and agents",
|
||||
"frequency": "No information provided.",
|
||||
"description": "atlassian-bot is a web crawler used to index website content for its AI search, assistants and agents available in its Rovo GenAI product."
|
||||
},
|
||||
"Awario": {
|
||||
"operator": "Awario",
|
||||
"respect": "Unclear at this time.",
|
||||
|
|
@ -146,6 +153,13 @@
|
|||
"frequency": "Monthly at present.",
|
||||
"description": "Web archive going back to 2008. [Cited in thousands of research papers per year](https://commoncrawl.org/research-papers)."
|
||||
},
|
||||
"ChatGLM-Spider": {
|
||||
"operator": "Unclear at this time.",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "AI Data Scrapers",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/chatglm-spider"
|
||||
},
|
||||
"ChatGPT Agent": {
|
||||
"operator": "[OpenAI](https://openai.com)",
|
||||
"respect": "Yes",
|
||||
|
|
@ -384,6 +398,20 @@
|
|||
"frequency": "No information.",
|
||||
"description": "Data is used to train current and future models, removed paywalled data, PII and data that violates the company's policies."
|
||||
},
|
||||
"iAskBot": {
|
||||
"operator": "Unclear at this time.",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "Undocumented AI Agents",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/iaskbot"
|
||||
},
|
||||
"iaskspider": {
|
||||
"operator": "Unclear at this time.",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "Undocumented AI Agents",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/iaskspider"
|
||||
},
|
||||
"iaskspider/2.0": {
|
||||
"description": "Used to provide answers to user queries.",
|
||||
"frequency": "Unclear at this time.",
|
||||
|
|
@ -412,6 +440,13 @@
|
|||
"operator": "[ImageSift](https://imagesift.com)",
|
||||
"respect": "[Yes](https://imagesift.com/about)"
|
||||
},
|
||||
"imageSpider": {
|
||||
"operator": "Unclear at this time.",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "AI Data Scrapers",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/imagespider"
|
||||
},
|
||||
"img2dataset": {
|
||||
"description": "Downloads large sets of images into datasets for LLM training or other purposes.",
|
||||
"frequency": "At the discretion of img2dataset users.",
|
||||
|
|
@ -440,6 +475,20 @@
|
|||
"frequency": "Indexes based on 'change signals' and user configuration.",
|
||||
"description": "Indexes content to tailor AI experiences, generate content, answers and recommendations."
|
||||
},
|
||||
"KunatoCrawler": {
|
||||
"operator": "Unclear at this time.",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "Undocumented AI Agents",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/kunatocrawler"
|
||||
},
|
||||
"laion-huggingface-processor": {
|
||||
"operator": "Unclear at this time.",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "AI Data Scrapers",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/laion-huggingface-processor"
|
||||
},
|
||||
"LAIONDownloader": {
|
||||
"operator": "[Large-scale Artificial Intelligence Open Network](https://laion.ai/)",
|
||||
"respect": "[No](https://laion.ai/faq/)",
|
||||
|
|
@ -447,6 +496,13 @@
|
|||
"frequency": "Unclear at this time.",
|
||||
"description": "LAIONDownloader is a bot by LAION, a non-profit organization that provides datasets, tools and models to liberate machine learning research."
|
||||
},
|
||||
"LCC": {
|
||||
"operator": "Unclear at this time.",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "AI Data Scrapers",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/lcc"
|
||||
},
|
||||
"LinerBot": {
|
||||
"operator": "Unclear at this time.",
|
||||
"respect": "Unclear at this time.",
|
||||
|
|
@ -461,6 +517,13 @@
|
|||
"frequency": "Unclear at this time.",
|
||||
"description": "Linguee Bot is a web crawler used by Linguee to gather training data for its AI powered translation service."
|
||||
},
|
||||
"LinkupBot": {
|
||||
"operator": "Unclear at this time.",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "AI Search Crawlers",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/linkupbot"
|
||||
},
|
||||
"meta-externalagent": {
|
||||
"operator": "[Meta](https://developers.facebook.com/docs/sharing/webmasters/web-crawlers)",
|
||||
"respect": "Yes",
|
||||
|
|
@ -622,6 +685,13 @@
|
|||
"operator": "[phind](https://www.phind.com/)",
|
||||
"respect": "Unclear at this time."
|
||||
},
|
||||
"Poggio-Citations": {
|
||||
"operator": "Unclear at this time.",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "AI Assistants",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/poggio-citations"
|
||||
},
|
||||
"Poseidon Research Crawler": {
|
||||
"operator": "[Poseidon Research](https://www.poseidonresearch.com)",
|
||||
"description": "Lab focused on scaling the interpretability research necessary to make better AI systems possible.",
|
||||
|
|
@ -651,11 +721,11 @@
|
|||
"respect": "Unclear at this time."
|
||||
},
|
||||
"SBIntuitionsBot": {
|
||||
"description": "AI development and information analysis",
|
||||
"operator": "[SB Intuitions](https://www.sbintuitions.co.jp/en/)",
|
||||
"respect": "[Yes](https://www.sbintuitions.co.jp/en/bot/)",
|
||||
"frequency": "No information.",
|
||||
"function": "Uses data gathered in AI development and information analysis.",
|
||||
"operator": "[SB Intuitions](https://www.sbintuitions.co.jp/en/)"
|
||||
"frequency": "No information.",
|
||||
"description": "AI development and information analysis"
|
||||
},
|
||||
"Scrapy": {
|
||||
"description": "\"AI and machine learning applications often need large amounts of quality data, and web data extraction is a fast, efficient way to build structured data sets.\"",
|
||||
|
|
@ -692,6 +762,13 @@
|
|||
"operator": "[Sidetrade](https://www.sidetrade.com)",
|
||||
"respect": "Unclear at this time."
|
||||
},
|
||||
"Spider": {
|
||||
"operator": "Unclear at this time.",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "AI Data Scrapers",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/spider"
|
||||
},
|
||||
"TerraCotta": {
|
||||
"operator": "[Ceramic AI](https://ceramic.ai/)",
|
||||
"respect": "[Yes](https://github.com/CeramicTeam/CeramicTerracotta)",
|
||||
|
|
@ -721,11 +798,11 @@
|
|||
"description": "Makes data available for training AI models."
|
||||
},
|
||||
"VelenPublicWebCrawler": {
|
||||
"description": "\"Our goal with this crawler is to build business datasets and machine learning models to better understand the web.\"",
|
||||
"frequency": "No information.",
|
||||
"function": "Scrapes data for business data sets and machine learning models.",
|
||||
"operator": "[Velen Crawler](https://velen.io)",
|
||||
"respect": "[Yes](https://velen.io)"
|
||||
"respect": "[Yes](https://velen.io)",
|
||||
"function": "Scrapes data for business data sets and machine learning models.",
|
||||
"frequency": "No information.",
|
||||
"description": "\"Our goal with this crawler is to build business datasets and machine learning models to better understand the web.\""
|
||||
},
|
||||
"WARDBot": {
|
||||
"operator": "WEBSPARK",
|
||||
|
|
@ -741,6 +818,13 @@
|
|||
"frequency": "Unclear at this time.",
|
||||
"description": "Webzio-Extended is a web crawler used by Webz.io to maintain a repository of web crawl data that it sells to other companies, including those using it to train AI models. More info can be found at https://darkvisitors.com/agents/agents/webzio-extended"
|
||||
},
|
||||
"webzio-extended": {
|
||||
"operator": "Unclear at this time.",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "AI Data Scrapers",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/webzio-extended"
|
||||
},
|
||||
"wpbot": {
|
||||
"operator": "[QuantumCloud](https://www.quantumcloud.com)",
|
||||
"respect": "Unclear at this time; opt out provided via [Google Form](https://forms.gle/ajBaxygz9jSR8p8G9)",
|
||||
|
|
@ -748,6 +832,13 @@
|
|||
"frequency": "Unclear at this time.",
|
||||
"description": "wpbot is a used to support the functionality of the AI Chatbot for WordPress plugin. It supports the use of customer models, data collection and customer support."
|
||||
},
|
||||
"WRTNBot": {
|
||||
"operator": "Unclear at this time.",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "Undocumented AI Agents",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/wrtnbot"
|
||||
},
|
||||
"YaK": {
|
||||
"operator": "[Meltwater](https://www.meltwater.com/en/suite/consumer-intelligence)",
|
||||
"respect": "Unclear at this time.",
|
||||
|
|
@ -775,5 +866,12 @@
|
|||
"function": "Scrapes data for search engine and LLMs.",
|
||||
"frequency": "No information.",
|
||||
"description": "Retrieves data used for You.com web search engine and LLMs."
|
||||
},
|
||||
"ZanistaBot": {
|
||||
"operator": "Unclear at this time.",
|
||||
"respect": "Unclear at this time.",
|
||||
"function": "AI Search Crawlers",
|
||||
"frequency": "Unclear at this time.",
|
||||
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/zanistabot"
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue