Merge pull request #174 from karolyi/master

Update Brightbot operator and details; add meta-webindexer entry
This commit is contained in:
Glyn Normington 2025-09-09 03:45:23 +01:00 committed by GitHub
commit 4d506ca322
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -84,11 +84,11 @@
"description": "bigsur.ai is a web crawler operated by Big Sur AI that fetches website content to enable AI-powered web agents, sales assistants, and content marketing solutions for businesses. More info can be found at https://darkvisitors.com/agents/agents/bigsur-ai"
},
"Brightbot 1.0": {
"operator": "Browsing.ai",
"operator": "https://brightdata.com/brightbot",
"respect": "Unclear at this time.",
"function": "LLM/AI training.",
"frequency": "Unclear at this time.",
"description": "Scrapes data to train LLMs and AI products focused on website customer support."
"frequency": "At least one per minute.",
"description": "Scrapes data to train LLMs and AI products focused on website customer support, [uses residential IPs and legit-looking user-agents to disguise itself](https://ksol.io/en/blog/posts/brightbot-not-that-bright/)."
},
"Bytespider": {
"operator": "ByteDance",
@ -391,6 +391,13 @@
"frequency": "Unclear at this time.",
"description": "Meta-ExternalFetcher is dispatched by Meta AI products in response to user prompts, when they need to fetch an individual links. More info can be found at https://darkvisitors.com/agents/agents/meta-externalfetcher"
},
"meta-webindexer": {
"operator": "[Meta](https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/)",
"respect": "Unclear at this time.",
"function": "AI Assistants",
"frequency": "Unhinged, more than 1 per second.",
"description": "As per their documentation, \"The Meta-WebIndexer crawler navigates the web to improve Meta AI search result quality for users. In doing so, Meta analyzes online content to enhance the relevance and accuracy of Meta AI. Allowing Meta-WebIndexer in your robots.txt file helps us cite and link to your content in Meta AI's responses.\""
},
"Meta-ExternalFetcher": {
"operator": "Unclear at this time.",
"respect": "Unclear at this time.",
@ -664,4 +671,4 @@
"frequency": "No information.",
"description": "Retrieves data used for You.com web search engine and LLMs."
}
}
}