From 100a6f3d11140953bf5d1843890cac1d9f6efd2c Mon Sep 17 00:00:00 2001 From: Seirdy Date: Thu, 26 Sep 2024 10:47:07 -0400 Subject: [PATCH] block another LLM scraper --- static/robots.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/static/robots.txt b/static/robots.txt index 5b1d5aa..94c6a97 100644 --- a/static/robots.txt +++ b/static/robots.txt @@ -124,6 +124,11 @@ Disallow: / # This one doesn't support robots.txt: https://www.allenai.org/crawler # block it with your reverse-proxy or WAF or something. +# See +# Parent page says it builds LLMs in the infographic: +User-agent: Cotoyogi +Disallow: / + # I'm not blocking CCBot for now. It publishes a free index for anyone to use. # Googe used this to train the initial version of Bard (now called Gemini). # I allow CCBot since its index is also used for upstart/hobbyist search engines