1
0
Fork 0
mirror of https://git.sr.ht/~seirdy/seirdy.one synced 2024-11-23 21:02:09 +00:00

Update docs in robots.txt

This commit is contained in:
Rohan Kumar 2024-03-13 01:14:49 -04:00
parent dc4dcb24a7
commit 0e89f7f052
No known key found for this signature in database
GPG key ID: 1E892DB2A5F84479

View file

@ -49,18 +49,28 @@ Disallow: /
User-agent: GPTBot User-agent: GPTBot
Disallow: / Disallow: /
# Official way to opt-out of Google's generative AI training: https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers # Official way to opt-out of Google's generative AI training:
# <https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers>
User-agent: Google-Extended User-agent: Google-Extended
Disallow: / Disallow: /
# There isn't any public documentation for this AFAICT, but Reuters thinks this works so I might as well give it a shot. # There isn't any public documentation for this AFAICT.
# Reuters thinks this works so I might as well give it a shot.
User-agent: anthropic-ai User-agent: anthropic-ai
Disallow: / Disallow: /
User-agent: Claude-Web User-agent: Claude-Web
Disallow: / Disallow: /
# I'm not blocking CCBot for now, since it's also used for upstart/hobbyist search engines like Alexandria and for genuinely useful academic work I personally like. I'm hoping my embedded robots meta-tags and headers will cover gen-AI opt-outs instead. # I'm not blocking CCBot for now. It publishes a free index for anyone to use.
# Omgilibot/Omgili is similar to CCBot, except it sells the scrape results. I'm not familiar enough to make a call here. # Googe used this to train the initial version of Bard (now called Gemini).
# I allow CCBot since its index is also used for upstart/hobbyist search engines
# like Alexandria and for genuinely useful academic work I personally like.
# I allow Owler for similar reasons:
# <https://openwebsearch.eu/owler/#owler-opt-out>
# <https://openwebsearch.eu/common-goals-with-common-crawl/>.
# Omgilibot/Omgili is similar to CCBot, except it sells the scrape results.
# I'm not familiar enough with Omgili to make a call here.
# In the long run, my embedded robots meta-tags and headers should cover gen-AI
Sitemap: https://seirdy.one/sitemap.xml Sitemap: https://seirdy.one/sitemap.xml