2022-06-18 03:36:35 +00:00
DirectoryPath : "public"
IgnoreDirs :
- "search"
2023-05-27 04:12:16 +00:00
CacheExpires : "120h" # 1 day
2022-06-18 03:36:35 +00:00
CheckFavicon : true
EnforceHTML5 : true
2022-11-20 19:27:24 +00:00
IgnoreAltMissing : false
IgnoreAltEmpty : true # an empty alt makes presentation-role explicit, it's not a defect.
2022-06-18 03:36:35 +00:00
EnforceHTTPS : true
2023-05-27 04:12:16 +00:00
ExternalTimeout : 10
2022-11-23 03:15:48 +00:00
HTTPConcurrencyLimit : 128
2022-06-18 03:36:35 +00:00
IgnoreHTTPS :
- "http://192.168.1"
- "http://localhost:"
- "http://wgq3bd2kqoybhstp77i3wrzbfnsyd27wt34psaja4grqiezqircorkyd.onion"
- "http://bettermotherfuckingwebsite.com/"
- "http://dtrace.org/"
2024-04-24 20:09:35 +00:00
- "http://www.wall.org/~larry/"
2022-06-18 03:36:35 +00:00
- "http://herpolhode.com/"
- "http://io.netgarage.org/me/"
- "http://linter.structured-data.org/"
- "http://optipng.sourceforge.net/"
- "http://renaissancechambara.jp/"
- "http://techrights.org/"
- "http://www.nathanmyhrvold.com/"
- "http://www.tuxmachines.org/"
- "http://xmlns.com/"
2022-10-20 17:12:41 +00:00
- "http://nerdlistings.info/"
2022-06-18 03:36:35 +00:00
CheckExternal : true
IgnoreURLs :
- "../music.txt"
2022-06-19 20:44:51 +00:00
- "^gemini://"
- "mailto:"
- "git@git"
2022-11-18 04:16:57 +00:00
- "http://[^/]*\\.onion"
2022-06-19 20:44:51 +00:00
- "https://archive.is"
2022-11-07 18:47:43 +00:00
- "https://archive.ph"
- "https://archive.today"
2022-06-19 20:44:51 +00:00
- "https://ogp.me/ns"
2023-12-08 07:53:25 +00:00
- "https://collector.seirdy.one/webmentions/"
2022-06-19 20:44:51 +00:00
- "http://creativecommons.org/ns"
- "https://seirdy.one/search/"
2023-05-27 04:12:16 +00:00
# - "https://seirdy.one/.well-known/webfinger?resource=acct%3Aseirdy%40seirdy.one"
- "https://seirdy.one/.well-known/webfinger" # inexplicable false positive
2022-06-18 03:36:35 +00:00
- "https://fediring.net/(previous|next)" # redir
2022-08-12 04:30:20 +00:00
# - "https://forum.palemoon.org/viewtopic.php?f=1&t=25473" # manual check: blocks crawlers
- "https://forum.palemoon.org/viewtopic.php"
2022-06-19 20:44:51 +00:00
- "https://queue.acm.org/detail" # manual check: blocks crawlers
2022-06-18 03:36:35 +00:00
- "https://plausible.io/blog/google-floc#" # manual check: I block this domain
2022-08-12 04:30:20 +00:00
# - "https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=830173" # manual check: 400 for some reason, using curl works fine.
- "https://bugs.debian.org/cgi-bin/bugreport.cgi"
# - "https://forum.kuketz-blog.de/viewtopic.php?p=78202" # manual check: blocks crawlers
- "https://forum.kuketz-blog.de/viewtopic.php"
2022-08-25 03:16:20 +00:00
- "https://web.archive.org/" # the wayback machine itself.
2023-05-27 04:12:16 +00:00
- "https://i.reddit.com/r/web_design/comments/k0dmpj/an_opinionated_list_of_best_practices_for_textual/gdmxy4u/"
2022-10-11 04:35:28 +00:00
- "https://gitweb.torproject.org/tor-browser.git/tree/browser/components/securitylevel/SecurityLevel.jsm.id=ffdf" # Seems to block htmltest; check manually
- "https://lnk.dk" # blocks htmltest
2022-10-17 19:44:51 +00:00
- "https://www.fastcompany.com/90759792/with-google-dominating-search-the-internet-needs-crawl-neutrality" # blocks htmltest
2022-11-07 18:47:43 +00:00
- "https://faq.whatsapp.com/general/security-and-privacy/answering-your-questions-about-whatsapps-privacy-policy" # my DNS filters block this domain, cbf to work around it just for htmltest
- "https://doi.org/10.1515/popets-2017-0023" # redirects to a different domain which tends to block requests. DOI is generally good about keeping links alive; it's kinda the point of the service.
- "https://docs.graycot.dev/s/MFowZsw_F" # DNSSEC issue
2022-11-20 19:27:24 +00:00
# user has opted out of archiving
- "https://herd.bovid.space/@garbados"
2022-06-18 03:36:35 +00:00
OutputDir : "linter-configs/htmltest"