Update from Dark Visitors

This commit is contained in:
dark-visitors 2025-12-02 01:25:24 +00:00
commit 8363d4fdd4

View file

@ -13,12 +13,19 @@
"frequency": "No information provided.", "frequency": "No information provided.",
"description": "Explores 'certain domains' to find web content." "description": "Explores 'certain domains' to find web content."
}, },
"AI2Bot-DeepResearchEval": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Assistants",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/ai2bot-deepresearcheval"
},
"Ai2Bot-Dolma": { "Ai2Bot-Dolma": {
"description": "Explores 'certain domains' to find web content.",
"frequency": "No information provided.",
"function": "Content is used to train open language models.",
"operator": "[Ai2](https://allenai.org/crawler)", "operator": "[Ai2](https://allenai.org/crawler)",
"respect": "Yes" "respect": "Yes",
"function": "Content is used to train open language models.",
"frequency": "No information provided.",
"description": "Explores 'certain domains' to find web content."
}, },
"aiHitBot": { "aiHitBot": {
"operator": "[aiHit](https://www.aihitdata.com/about)", "operator": "[aiHit](https://www.aihitdata.com/about)",
@ -27,20 +34,6 @@
"frequency": "No information provided.", "frequency": "No information provided.",
"description": "Scrapes data for AI systems." "description": "Scrapes data for AI systems."
}, },
"AmazonBuyForMe": {
"operator": "[Amazon](https://amazon.com)",
"respect": "Unclear at this time.",
"function": "AI Agents",
"frequency": "No information provided.",
"description": "Buy For Me is an AI agent that helps buy products at the direction of customers."
},
"atlassian-bot": {
"operator": "[Atlassian](https://www.atlassian.com)",
"respect": "[Yes](https://support.atlassian.com/organization-administration/docs/connect-custom-website-to-rovo/#Editing-your-robots.txt)",
"function": "AI search, assistants and agents",
"frequency": "No information provided.",
"description": "atlassian-bot is a web crawler used to index website content for its AI search, assistants and agents available in its Rovo GenAI product."
},
"amazon-kendra": { "amazon-kendra": {
"operator": "Amazon", "operator": "Amazon",
"respect": "Yes", "respect": "Yes",
@ -55,6 +48,13 @@
"frequency": "No information provided.", "frequency": "No information provided.",
"description": "Includes references to crawled website when surfacing answers via Alexa; does not clearly outline other uses." "description": "Includes references to crawled website when surfacing answers via Alexa; does not clearly outline other uses."
}, },
"AmazonBuyForMe": {
"operator": "[Amazon](https://amazon.com)",
"respect": "Unclear at this time.",
"function": "AI Agents",
"frequency": "No information provided.",
"description": "Buy For Me is an AI agent that helps buy products at the direction of customers."
},
"Andibot": { "Andibot": {
"operator": "[Andi](https://andisearch.com/)", "operator": "[Andi](https://andisearch.com/)",
"respect": "Unclear at this time", "respect": "Unclear at this time",
@ -90,6 +90,13 @@
"frequency": "Unclear at this time.", "frequency": "Unclear at this time.",
"description": "Apple has a secondary user agent, Applebot-Extended ... [that is] used to train Apple's foundation models powering generative AI features across Apple products, including Apple Intelligence, Services, and Developer Tools." "description": "Apple has a secondary user agent, Applebot-Extended ... [that is] used to train Apple's foundation models powering generative AI features across Apple products, including Apple Intelligence, Services, and Developer Tools."
}, },
"atlassian-bot": {
"operator": "[Atlassian](https://www.atlassian.com)",
"respect": "[Yes](https://support.atlassian.com/organization-administration/docs/connect-custom-website-to-rovo/#Editing-your-robots.txt)",
"function": "AI search, assistants and agents",
"frequency": "No information provided.",
"description": "atlassian-bot is a web crawler used to index website content for its AI search, assistants and agents available in its Rovo GenAI product."
},
"Awario": { "Awario": {
"operator": "Awario", "operator": "Awario",
"respect": "Unclear at this time.", "respect": "Unclear at this time.",
@ -146,6 +153,13 @@
"frequency": "Monthly at present.", "frequency": "Monthly at present.",
"description": "Web archive going back to 2008. [Cited in thousands of research papers per year](https://commoncrawl.org/research-papers)." "description": "Web archive going back to 2008. [Cited in thousands of research papers per year](https://commoncrawl.org/research-papers)."
}, },
"ChatGLM-Spider": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Data Scrapers",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/chatglm-spider"
},
"ChatGPT Agent": { "ChatGPT Agent": {
"operator": "[OpenAI](https://openai.com)", "operator": "[OpenAI](https://openai.com)",
"respect": "Yes", "respect": "Yes",
@ -384,6 +398,20 @@
"frequency": "No information.", "frequency": "No information.",
"description": "Data is used to train current and future models, removed paywalled data, PII and data that violates the company's policies." "description": "Data is used to train current and future models, removed paywalled data, PII and data that violates the company's policies."
}, },
"iAskBot": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "Undocumented AI Agents",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/iaskbot"
},
"iaskspider": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "Undocumented AI Agents",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/iaskspider"
},
"iaskspider/2.0": { "iaskspider/2.0": {
"description": "Used to provide answers to user queries.", "description": "Used to provide answers to user queries.",
"frequency": "Unclear at this time.", "frequency": "Unclear at this time.",
@ -412,6 +440,13 @@
"operator": "[ImageSift](https://imagesift.com)", "operator": "[ImageSift](https://imagesift.com)",
"respect": "[Yes](https://imagesift.com/about)" "respect": "[Yes](https://imagesift.com/about)"
}, },
"imageSpider": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Data Scrapers",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/imagespider"
},
"img2dataset": { "img2dataset": {
"description": "Downloads large sets of images into datasets for LLM training or other purposes.", "description": "Downloads large sets of images into datasets for LLM training or other purposes.",
"frequency": "At the discretion of img2dataset users.", "frequency": "At the discretion of img2dataset users.",
@ -440,6 +475,20 @@
"frequency": "Indexes based on 'change signals' and user configuration.", "frequency": "Indexes based on 'change signals' and user configuration.",
"description": "Indexes content to tailor AI experiences, generate content, answers and recommendations." "description": "Indexes content to tailor AI experiences, generate content, answers and recommendations."
}, },
"KunatoCrawler": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "Undocumented AI Agents",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/kunatocrawler"
},
"laion-huggingface-processor": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Data Scrapers",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/laion-huggingface-processor"
},
"LAIONDownloader": { "LAIONDownloader": {
"operator": "[Large-scale Artificial Intelligence Open Network](https://laion.ai/)", "operator": "[Large-scale Artificial Intelligence Open Network](https://laion.ai/)",
"respect": "[No](https://laion.ai/faq/)", "respect": "[No](https://laion.ai/faq/)",
@ -447,6 +496,13 @@
"frequency": "Unclear at this time.", "frequency": "Unclear at this time.",
"description": "LAIONDownloader is a bot by LAION, a non-profit organization that provides datasets, tools and models to liberate machine learning research." "description": "LAIONDownloader is a bot by LAION, a non-profit organization that provides datasets, tools and models to liberate machine learning research."
}, },
"LCC": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Data Scrapers",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/lcc"
},
"LinerBot": { "LinerBot": {
"operator": "Unclear at this time.", "operator": "Unclear at this time.",
"respect": "Unclear at this time.", "respect": "Unclear at this time.",
@ -461,6 +517,13 @@
"frequency": "Unclear at this time.", "frequency": "Unclear at this time.",
"description": "Linguee Bot is a web crawler used by Linguee to gather training data for its AI powered translation service." "description": "Linguee Bot is a web crawler used by Linguee to gather training data for its AI powered translation service."
}, },
"LinkupBot": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Search Crawlers",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/linkupbot"
},
"meta-externalagent": { "meta-externalagent": {
"operator": "[Meta](https://developers.facebook.com/docs/sharing/webmasters/web-crawlers)", "operator": "[Meta](https://developers.facebook.com/docs/sharing/webmasters/web-crawlers)",
"respect": "Yes", "respect": "Yes",
@ -622,6 +685,13 @@
"operator": "[phind](https://www.phind.com/)", "operator": "[phind](https://www.phind.com/)",
"respect": "Unclear at this time." "respect": "Unclear at this time."
}, },
"Poggio-Citations": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Assistants",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/poggio-citations"
},
"Poseidon Research Crawler": { "Poseidon Research Crawler": {
"operator": "[Poseidon Research](https://www.poseidonresearch.com)", "operator": "[Poseidon Research](https://www.poseidonresearch.com)",
"description": "Lab focused on scaling the interpretability research necessary to make better AI systems possible.", "description": "Lab focused on scaling the interpretability research necessary to make better AI systems possible.",
@ -651,11 +721,11 @@
"respect": "Unclear at this time." "respect": "Unclear at this time."
}, },
"SBIntuitionsBot": { "SBIntuitionsBot": {
"description": "AI development and information analysis", "operator": "[SB Intuitions](https://www.sbintuitions.co.jp/en/)",
"respect": "[Yes](https://www.sbintuitions.co.jp/en/bot/)", "respect": "[Yes](https://www.sbintuitions.co.jp/en/bot/)",
"frequency": "No information.",
"function": "Uses data gathered in AI development and information analysis.", "function": "Uses data gathered in AI development and information analysis.",
"operator": "[SB Intuitions](https://www.sbintuitions.co.jp/en/)" "frequency": "No information.",
"description": "AI development and information analysis"
}, },
"Scrapy": { "Scrapy": {
"description": "\"AI and machine learning applications often need large amounts of quality data, and web data extraction is a fast, efficient way to build structured data sets.\"", "description": "\"AI and machine learning applications often need large amounts of quality data, and web data extraction is a fast, efficient way to build structured data sets.\"",
@ -692,6 +762,13 @@
"operator": "[Sidetrade](https://www.sidetrade.com)", "operator": "[Sidetrade](https://www.sidetrade.com)",
"respect": "Unclear at this time." "respect": "Unclear at this time."
}, },
"Spider": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Data Scrapers",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/spider"
},
"TerraCotta": { "TerraCotta": {
"operator": "[Ceramic AI](https://ceramic.ai/)", "operator": "[Ceramic AI](https://ceramic.ai/)",
"respect": "[Yes](https://github.com/CeramicTeam/CeramicTerracotta)", "respect": "[Yes](https://github.com/CeramicTeam/CeramicTerracotta)",
@ -721,11 +798,11 @@
"description": "Makes data available for training AI models." "description": "Makes data available for training AI models."
}, },
"VelenPublicWebCrawler": { "VelenPublicWebCrawler": {
"description": "\"Our goal with this crawler is to build business datasets and machine learning models to better understand the web.\"",
"frequency": "No information.",
"function": "Scrapes data for business data sets and machine learning models.",
"operator": "[Velen Crawler](https://velen.io)", "operator": "[Velen Crawler](https://velen.io)",
"respect": "[Yes](https://velen.io)" "respect": "[Yes](https://velen.io)",
"function": "Scrapes data for business data sets and machine learning models.",
"frequency": "No information.",
"description": "\"Our goal with this crawler is to build business datasets and machine learning models to better understand the web.\""
}, },
"WARDBot": { "WARDBot": {
"operator": "WEBSPARK", "operator": "WEBSPARK",
@ -741,6 +818,13 @@
"frequency": "Unclear at this time.", "frequency": "Unclear at this time.",
"description": "Webzio-Extended is a web crawler used by Webz.io to maintain a repository of web crawl data that it sells to other companies, including those using it to train AI models. More info can be found at https://darkvisitors.com/agents/agents/webzio-extended" "description": "Webzio-Extended is a web crawler used by Webz.io to maintain a repository of web crawl data that it sells to other companies, including those using it to train AI models. More info can be found at https://darkvisitors.com/agents/agents/webzio-extended"
}, },
"webzio-extended": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Data Scrapers",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/webzio-extended"
},
"wpbot": { "wpbot": {
"operator": "[QuantumCloud](https://www.quantumcloud.com)", "operator": "[QuantumCloud](https://www.quantumcloud.com)",
"respect": "Unclear at this time; opt out provided via [Google Form](https://forms.gle/ajBaxygz9jSR8p8G9)", "respect": "Unclear at this time; opt out provided via [Google Form](https://forms.gle/ajBaxygz9jSR8p8G9)",
@ -748,6 +832,13 @@
"frequency": "Unclear at this time.", "frequency": "Unclear at this time.",
"description": "wpbot is a used to support the functionality of the AI Chatbot for WordPress plugin. It supports the use of customer models, data collection and customer support." "description": "wpbot is a used to support the functionality of the AI Chatbot for WordPress plugin. It supports the use of customer models, data collection and customer support."
}, },
"WRTNBot": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "Undocumented AI Agents",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/wrtnbot"
},
"YaK": { "YaK": {
"operator": "[Meltwater](https://www.meltwater.com/en/suite/consumer-intelligence)", "operator": "[Meltwater](https://www.meltwater.com/en/suite/consumer-intelligence)",
"respect": "Unclear at this time.", "respect": "Unclear at this time.",
@ -775,5 +866,12 @@
"function": "Scrapes data for search engine and LLMs.", "function": "Scrapes data for search engine and LLMs.",
"frequency": "No information.", "frequency": "No information.",
"description": "Retrieves data used for You.com web search engine and LLMs." "description": "Retrieves data used for You.com web search engine and LLMs."
},
"ZanistaBot": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
"function": "AI Search Crawlers",
"frequency": "Unclear at this time.",
"description": "Description unavailable from darkvisitors.com More info can be found at https://darkvisitors.com/agents/agents/zanistabot"
} }
} }