mirror of
https://git.sr.ht/~seirdy/seirdy.one
synced 2024-11-14 01:32:11 +00:00
62 lines
3 KiB
YAML
62 lines
3 KiB
YAML
DirectoryPath: "public"
|
|
IgnoreDirs:
|
|
- "search"
|
|
CacheExpires: "120h" # 5 days
|
|
CheckFavicon: true
|
|
EnforceHTML5: true
|
|
IgnoreAltMissing: false
|
|
IgnoreAltEmpty: true # an empty alt makes presentation-role explicit, it's not a defect.
|
|
EnforceHTTPS: true
|
|
ExternalTimeout: 20
|
|
HTTPConcurrencyLimit: 128
|
|
IgnoreHTTPS:
|
|
- "http://192.168.1"
|
|
- "http://localhost:"
|
|
- "http://wgq3bd2kqoybhstp77i3wrzbfnsyd27wt34psaja4grqiezqircorkyd.onion"
|
|
- "http://bettermotherfuckingwebsite.com/"
|
|
- "http://www.wall.org/~larry/"
|
|
- "http://herpolhode.com/"
|
|
- "http://io.netgarage.org/me/"
|
|
- "http://linter.structured-data.org/"
|
|
- "http://techrights.org/"
|
|
- "http://www.tuxmachines.org/"
|
|
- "http://xmlns.com/"
|
|
- "http://nerdlistings.info/"
|
|
CheckExternal: true
|
|
IgnoreURLs:
|
|
- "../music.txt"
|
|
- "^gemini://"
|
|
- "mailto:"
|
|
- "git@git"
|
|
- "http://[^/]*\\.onion"
|
|
- "https://archive.is"
|
|
- "https://archive.ph"
|
|
- "https://archive.today"
|
|
- "https://baccyflap.com/noai" # connection refused
|
|
- "https://ogp.me/ns"
|
|
- "https://xeiaso.net" # HTTP 416
|
|
- "https://collector.seirdy.one/webmentions/"
|
|
- "http://creativecommons.org/ns"
|
|
- "https://seirdy.one/search/"
|
|
# - "https://seirdy.one/.well-known/webfinger?resource=acct%3Aseirdy%40seirdy.one"
|
|
- "https://seirdy.one/.well-known/webfinger" # inexplicable false positive
|
|
- "https://strugee.net/" # refuses connection
|
|
# - "https://forum.palemoon.org/viewtopic.php?f=1&t=25473" # manual check: blocks crawlers
|
|
- "https://forum.palemoon.org/viewtopic.php"
|
|
- "https://queue.acm.org/detail" # manual check: blocks crawlers
|
|
# - "https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=830173" # manual check: 400 for some reason, using curl works fine.
|
|
- "https://bugs.debian.org/cgi-bin/bugreport.cgi"
|
|
- "https://www.science.org/content/blog-post/deliberately-optimizing-harm" # http 413, browser works fine.
|
|
- "https://web.archive.org/" # the wayback machine itself.
|
|
- "https://i.reddit.com/r/web_design/comments/k0dmpj/an_opinionated_list_of_best_practices_for_textual/gdmxy4u/"
|
|
- "https://lnk.dk" # blocks htmltest
|
|
- "https://www.fastcompany.com/90759792/with-google-dominating-search-the-internet-needs-crawl-neutrality" # blocks htmltest
|
|
- "https://faq.whatsapp.com/general/security-and-privacy/answering-your-questions-about-whatsapps-privacy-policy" # my DNS filters block this domain, cbf to work around it just for htmltest
|
|
- "https://doi.org/10.1515/popets-2017-0023" # redirects to a different domain which tends to block requests. DOI is generally good about keeping links alive; it's kinda the point of the service.
|
|
# user has opted out of archiving
|
|
- "https://herd.bovid.space/@garbados"
|
|
- "https://make.wordpress.org/accessibility/handbook/markup/infinite-scroll/"
|
|
- "https://www.reddit.com/user/Seirdy/" # reddit blocks htmltest
|
|
- "https://io.netgarage.org/me/" # some clients give a TLS failure but recent browsers seem to work.
|
|
- "https://openai.com/blog/openai-codex/" # false positive
|
|
OutputDir: "linter-configs/htmltest"
|