2022-06-18 03:36:35 +00:00
|
|
|
DirectoryPath: "public"
|
|
|
|
IgnoreDirs:
|
|
|
|
- "search"
|
2022-06-23 23:10:11 +00:00
|
|
|
CacheExpires: "168h" # seven days
|
2022-06-18 03:36:35 +00:00
|
|
|
CheckFavicon: true
|
|
|
|
EnforceHTML5: true
|
|
|
|
IgnoreAltMissing: true # an empty alt makes presentation-role explicit, it's not a defect.
|
|
|
|
EnforceHTTPS: true
|
2022-06-23 23:10:11 +00:00
|
|
|
ExternalTimeout: 180
|
|
|
|
HTTPConcurrencyLimit: 96
|
2022-06-18 03:36:35 +00:00
|
|
|
IgnoreHTTPS:
|
|
|
|
- "http://192.168.1"
|
|
|
|
- "http://localhost:"
|
|
|
|
- "http://wgq3bd2kqoybhstp77i3wrzbfnsyd27wt34psaja4grqiezqircorkyd.onion"
|
|
|
|
- "http://bettermotherfuckingwebsite.com/"
|
|
|
|
- "http://dtrace.org/"
|
|
|
|
- "http://emacspeak.sourceforge.net/"
|
|
|
|
- "http://herpolhode.com/"
|
|
|
|
- "http://io.netgarage.org/me/"
|
|
|
|
- "http://linter.structured-data.org/"
|
|
|
|
- "http://optipng.sourceforge.net/"
|
|
|
|
- "http://renaissancechambara.jp/"
|
|
|
|
- "http://techrights.org/"
|
|
|
|
- "http://www.nathanmyhrvold.com/"
|
|
|
|
- "http://www.tuxmachines.org/"
|
|
|
|
- "http://xmlns.com/"
|
|
|
|
CheckExternal: true
|
|
|
|
IgnoreURLs:
|
|
|
|
- "../music.txt"
|
2022-06-19 20:44:51 +00:00
|
|
|
- "^gemini://"
|
|
|
|
- "mailto:"
|
|
|
|
- "git@git"
|
|
|
|
- "http://[^/]*.onion"
|
|
|
|
- "https://archive.is"
|
|
|
|
- "https://ogp.me/ns"
|
|
|
|
- "https://seirdy.one/webmentions/"
|
|
|
|
- "http://creativecommons.org/ns"
|
|
|
|
- "https://seirdy.one/search/"
|
2022-06-18 03:36:35 +00:00
|
|
|
- "https://fediring.net/(previous|next)" # redir
|
2022-06-19 20:44:51 +00:00
|
|
|
- "https://forum.palemoon.org/" # manual check: blocks crawlers
|
|
|
|
- "https://queue.acm.org/detail" # manual check: blocks crawlers
|
2022-06-18 03:36:35 +00:00
|
|
|
- "https://www.geocities.ws/jaup/jaup.htm" # manual check: blocks crawlers
|
|
|
|
- "https://plausible.io/blog/google-floc#" # manual check: I block this domain
|
|
|
|
- "https://twitter.com/" # manual check: 404 for some reason, using curl works fine.
|
|
|
|
- "https://bugs.debian.org/cgi-bin/bugreport.cgi" # manual check: 400 for some reason, using curl works fine.
|
|
|
|
- "https://forum.kuketz-blog.de/" # manual check: blocks crawlers
|
2022-06-19 20:44:51 +00:00
|
|
|
- "https://web.archive.org/web/0/http" # the wayback machine.
|
2022-06-18 03:36:35 +00:00
|
|
|
OutputDir: "linter-configs/htmltest"
|