From c3b3f9a2697cbb5cbda0eaf745e255608539b920 Mon Sep 17 00:00:00 2001 From: Seirdy Date: Mon, 9 Dec 2024 23:14:06 -0500 Subject: [PATCH] Opt out of GenAI training on OpenWebSearch.EU --- static/robots.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/static/robots.txt b/static/robots.txt index 2e231d9..ab68206 100644 --- a/static/robots.txt +++ b/static/robots.txt @@ -141,9 +141,12 @@ Disallow: / # Googe used this to train the initial version of Bard (now called Gemini). # I allow CCBot since its index is also used for upstart/hobbyist search engines # like Alexandria and for genuinely useful academic work I personally like. -# I allow Owler for similar reasons: +# I allow Owler but disallow its "GenAI" identifier for similar reasons: # # . +User-Agent: GenAI +Disallow: / + # Omgilibot/Omgili is similar to CCBot, except it sells the scrape results. # I'm not familiar enough with Omgili to make a call here. # In the long run, my embedded robots meta-tags and headers could cover gen-AI