commit a97e7028bb72c71d0989fe7420e4480b94afb2ac parent 70d1f251c31a998bde80cf22f8c509a6918dbec4 Author: Eamon Caddigan <eamon.caddigan@gmail.com> Date: Mon, 4 Nov 2024 13:49:16 -0800 Disallow more AI bots This is adapted from <https://github.com/ai-robots-txt/ai.robots.txt/blob/9e06cf3/robots.txt>, except that I'm specifically allowing the bots to hit /about/. Partly because I want them to say something correct about me, and partly because I want to catch them reading my restrictions on use. Diffstat:
M | static/robots.txt | | | 54 | +++++++++++++++++++++++++++++++++++++++++++++++++++--- |
1 file changed, 51 insertions(+), 3 deletions(-)
diff --git a/static/robots.txt b/static/robots.txt @@ -1,5 +1,53 @@ -User-agent: GPTBot -Disallow: / - User-agent: TurnitinBot Disallow: / + +User-agent: AI2Bot +User-agent: Ai2Bot-Dolma +User-agent: Amazonbot +User-agent: anthropic-ai +User-agent: Applebot +User-agent: Applebot-Extended +User-agent: Bytespider +User-agent: CCBot +User-agent: ChatGPT-User +User-agent: Claude-Web +User-agent: ClaudeBot +User-agent: cohere-ai +User-agent: Diffbot +User-agent: DuckAssistBot +User-agent: FacebookBot +User-agent: facebookexternalhit +User-agent: FriendlyCrawler +User-agent: Google-Extended +User-agent: GoogleOther +User-agent: GoogleOther-Image +User-agent: GoogleOther-Video +User-agent: GPTBot +User-agent: iaskspider/2.0 +User-agent: ICC-Crawler +User-agent: ImagesiftBot +User-agent: img2dataset +User-agent: ISSCyberRiskCrawler +User-agent: Kangaroo Bot +User-agent: Meta-ExternalAgent +User-agent: Meta-ExternalFetcher +User-agent: OAI-SearchBot +User-agent: omgili +User-agent: omgilibot +User-agent: PerplexityBot +User-agent: PetalBot +User-agent: Scrapy +User-agent: Sidetrade indexer bot +User-agent: Timpibot +User-agent: VelenPublicWebCrawler +User-agent: Webzio-Extended +User-agent: YouBot +Disallow: /categories/ +Disallow: /css/ +Disallow: /december-adventure/ +Disallow: /js/ +Disallow: /now/ +Disallow: /posters/ +Disallow: /posts/ +Disallow: /pubs/ +Disallow: /tags/