DirectoryPath: "public" IgnoreDirs: - "search" CacheExpires: "120h" # 1 day CheckFavicon: true EnforceHTML5: true IgnoreAltMissing: false IgnoreAltEmpty: true # an empty alt makes presentation-role explicit, it's not a defect. EnforceHTTPS: true ExternalTimeout: 10 HTTPConcurrencyLimit: 128 IgnoreHTTPS: - "http://192.168.1" - "http://localhost:" - "http://wgq3bd2kqoybhstp77i3wrzbfnsyd27wt34psaja4grqiezqircorkyd.onion" - "http://bettermotherfuckingwebsite.com/" - "http://dtrace.org/" - "http://www.wall.org/~larry/" - "http://herpolhode.com/" - "http://io.netgarage.org/me/" - "http://linter.structured-data.org/" - "http://optipng.sourceforge.net/" - "http://renaissancechambara.jp/" - "http://techrights.org/" - "http://www.nathanmyhrvold.com/" - "http://www.tuxmachines.org/" - "http://xmlns.com/" - "http://nerdlistings.info/" CheckExternal: true IgnoreURLs: - "../music.txt" - "^gemini://" - "mailto:" - "git@git" - "http://[^/]*\\.onion" - "https://archive.is" - "https://archive.ph" - "https://archive.today" - "https://baccyflap.com/noai" # connection refused - "https://ogp.me/ns" - "https://xeiaso.net" # HTTP 416 - "https://collector.seirdy.one/webmentions/" - "http://creativecommons.org/ns" - "https://seirdy.one/search/" # - "https://seirdy.one/.well-known/webfinger?resource=acct%3Aseirdy%40seirdy.one" - "https://seirdy.one/.well-known/webfinger" # inexplicable false positive - "https://strugee.net/" # refuses connection - "https://www.moonshot.forbiddenl0ve.net/index.php" # cert mismatch false positive # - "https://forum.palemoon.org/viewtopic.php?f=1&t=25473" # manual check: blocks crawlers - "https://forum.palemoon.org/viewtopic.php" - "https://queue.acm.org/detail" # manual check: blocks crawlers - "https://plausible.io/blog/google-floc#" # manual check: I block this domain # - "https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=830173" # manual check: 400 for some reason, using curl works fine. - "https://bugs.debian.org/cgi-bin/bugreport.cgi" - "https://www.science.org/content/blog-post/deliberately-optimizing-harm" # http 413, browser works fine. # - "https://forum.kuketz-blog.de/viewtopic.php?p=78202" # manual check: blocks crawlers - "https://forum.kuketz-blog.de/viewtopic.php" - "https://web.archive.org/" # the wayback machine itself. - "https://i.reddit.com/r/web_design/comments/k0dmpj/an_opinionated_list_of_best_practices_for_textual/gdmxy4u/" - "https://gitweb.torproject.org/tor-browser.git/tree/browser/components/securitylevel/SecurityLevel.jsm.id=ffdf" # Seems to block htmltest; check manually - "https://lnk.dk" # blocks htmltest - "https://www.fastcompany.com/90759792/with-google-dominating-search-the-internet-needs-crawl-neutrality" # blocks htmltest - "https://faq.whatsapp.com/general/security-and-privacy/answering-your-questions-about-whatsapps-privacy-policy" # my DNS filters block this domain, cbf to work around it just for htmltest - "https://doi.org/10.1515/popets-2017-0023" # redirects to a different domain which tends to block requests. DOI is generally good about keeping links alive; it's kinda the point of the service. - "https://docs.graycot.dev/s/MFowZsw_F" # DNSSEC issue # user has opted out of archiving - "https://herd.bovid.space/@garbados" OutputDir: "linter-configs/htmltest"