From 1cd7f2c106807f4011f00d74a2cbeb24db2af0a2 Mon Sep 17 00:00:00 2001
From: Rohan Kumar <seirdy@seirdy.one>
Date: Tue, 12 Mar 2024 23:53:58 -0400
Subject: [PATCH] add some AI scrapers to robots.txt

---
 static/robots.txt | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/static/robots.txt b/static/robots.txt
index 88d56b6..8568aaa 100644
--- a/static/robots.txt
+++ b/static/robots.txt
@@ -53,4 +53,14 @@ Disallow: /
 User-agent: Google-Extended
 Disallow: /
 
+# There isn't any public documentation for this AFAICT, but Reuters thinks this works so I might as well give it a shot.
+User-agent: anthropic-ai
+Disallow: /
+
+User-agent: Claude-Web
+Disallow: /
+
+# I'm not blocking CCBot for now, since it's also used for upstart/hobbyist search engines like Alexandria and for genuinely useful academic work I personally like. I'm hoping my embedded robots meta-tags and headers will cover gen-AI opt-outs instead.
+# Omgilibot/Omgili is similar to CCBot, except it sells the scrape results. I'm not familiar enough to make a call here.
+
 Sitemap: https://seirdy.one/sitemap.xml