mirror of
https://git.sr.ht/~seirdy/seirdy.one
synced 2024-12-25 18:22:09 +00:00
62fbac8fd3
I don't use archive links to posts deleted by users who would rather not have their posts archived. Exclude this one from my broken link checker.
61 lines
2.7 KiB
YAML
61 lines
2.7 KiB
YAML
DirectoryPath: "public"
|
|
IgnoreDirs:
|
|
- "search"
|
|
CacheExpires: "48h" # 2 days
|
|
CheckFavicon: true
|
|
EnforceHTML5: true
|
|
IgnoreAltMissing: false
|
|
IgnoreAltEmpty: true # an empty alt makes presentation-role explicit, it's not a defect.
|
|
EnforceHTTPS: true
|
|
ExternalTimeout: 180
|
|
HTTPConcurrencyLimit: 96
|
|
IgnoreHTTPS:
|
|
- "http://192.168.1"
|
|
- "http://localhost:"
|
|
- "http://wgq3bd2kqoybhstp77i3wrzbfnsyd27wt34psaja4grqiezqircorkyd.onion"
|
|
- "http://bettermotherfuckingwebsite.com/"
|
|
- "http://dtrace.org/"
|
|
- "http://emacspeak.sourceforge.net/"
|
|
- "http://herpolhode.com/"
|
|
- "http://io.netgarage.org/me/"
|
|
- "http://linter.structured-data.org/"
|
|
- "http://optipng.sourceforge.net/"
|
|
- "http://renaissancechambara.jp/"
|
|
- "http://techrights.org/"
|
|
- "http://www.nathanmyhrvold.com/"
|
|
- "http://www.tuxmachines.org/"
|
|
- "http://xmlns.com/"
|
|
- "http://nerdlistings.info/"
|
|
CheckExternal: true
|
|
IgnoreURLs:
|
|
- "../music.txt"
|
|
- "^gemini://"
|
|
- "mailto:"
|
|
- "git@git"
|
|
- "http://[^/]*\\.onion"
|
|
- "https://archive.is"
|
|
- "https://archive.ph"
|
|
- "https://archive.today"
|
|
- "https://ogp.me/ns"
|
|
- "https://seirdy.one/webmentions/"
|
|
- "http://creativecommons.org/ns"
|
|
- "https://seirdy.one/search/"
|
|
- "https://fediring.net/(previous|next)" # redir
|
|
# - "https://forum.palemoon.org/viewtopic.php?f=1&t=25473" # manual check: blocks crawlers
|
|
- "https://forum.palemoon.org/viewtopic.php"
|
|
- "https://queue.acm.org/detail" # manual check: blocks crawlers
|
|
- "https://plausible.io/blog/google-floc#" # manual check: I block this domain
|
|
# - "https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=830173" # manual check: 400 for some reason, using curl works fine.
|
|
- "https://bugs.debian.org/cgi-bin/bugreport.cgi"
|
|
# - "https://forum.kuketz-blog.de/viewtopic.php?p=78202" # manual check: blocks crawlers
|
|
- "https://forum.kuketz-blog.de/viewtopic.php"
|
|
- "https://web.archive.org/" # the wayback machine itself.
|
|
- "https://gitweb.torproject.org/tor-browser.git/tree/browser/components/securitylevel/SecurityLevel.jsm.id=ffdf" # Seems to block htmltest; check manually
|
|
- "https://lnk.dk" # blocks htmltest
|
|
- "https://www.fastcompany.com/90759792/with-google-dominating-search-the-internet-needs-crawl-neutrality" # blocks htmltest
|
|
- "https://faq.whatsapp.com/general/security-and-privacy/answering-your-questions-about-whatsapps-privacy-policy" # my DNS filters block this domain, cbf to work around it just for htmltest
|
|
- "https://doi.org/10.1515/popets-2017-0023" # redirects to a different domain which tends to block requests. DOI is generally good about keeping links alive; it's kinda the point of the service.
|
|
- "https://docs.graycot.dev/s/MFowZsw_F" # DNSSEC issue
|
|
# user has opted out of archiving
|
|
- "https://herd.bovid.space/@garbados"
|
|
OutputDir: "linter-configs/htmltest"
|