Refactor shell scripts.

- Avoid relative paths, so I can run them from any working directory. - Make a dedicated curl-wrapping shell script instead of re-defining the same alias everywhere. - Support extended offline periods: allow get-webmentions.sh to fall back to the cached copy of my webmentions for up to a day, and don't accidentally destroy it; keep changes atomic. - Verify that the fetched webmentions are legit before replacing the cached ones. - Make shellcheck happy about quoting in vnu.sh by passing the list of files with xargs instead of a shell variable.
2025-02-25 15:50:05 +00:00 · 2023-11-23 18:45:03 -08:00 · 2023-11-23 18:45:03 -08:00 · 2b016aa973
commit 2b016aa973
parent a3e4729b6c
5 changed files with 66 additions and 27 deletions
--- a/scripts/bin/axe-ff
+++ b/scripts/bin/axe-ff
@ -1,7 +1,7 @@
 #!/bin/sh
 # Runs axe on every page of my sitemap
 # first arg is output directory, after that comes urls from a sitemap. We replace the urls with equivalent local file paths.
-PATH="scripts/bin:$PATH"
+PATH="$(dirname "$0"):$PATH"
 output_dir="$1"
 shift
 urls="$(echo "$*" | tr ' ' '\n' | sd '^https?://[^/]*' "file://$PWD/$output_dir" | sd '/$' '/index.html' | tr '\n' ' ')"
--- a/scripts/connectivity-check.sh
+++ b/scripts/connectivity-check.sh
@ -1,14 +1,14 @@
 #!/bin/sh
 set -e -u

-alias curl_cmd='curl --proto "=https" --proto-default https --http2 -siSL --tlsv1.3 --cert-status'
-
 ipv6_success=1
 ipv4_success=1

-curl_cmd -6 'seirdy.one/ip' || ipv6_success=0
+curl_wrapper="$(dirname "$0")/curl-wrapper.sh"
+
+"$curl_wrapper" -6 'https://seirdy.one/ip' || ipv6_success=0
 echo
-curl_cmd -4 'seirdy.one/ip' || ipv4_success=0
+"$curl_wrapper" -4 'https://seirdy.one/ip' || ipv4_success=0
 echo

 if [ "$ipv6_success" = 0 ] && [ "$ipv4_success" = 0 ]; then
--- a/scripts/curl-wrapper.sh
+++ b/scripts/curl-wrapper.sh
@ -0,0 +1,3 @@
+#!/bin/sh
+# a curl wrapper for seirdy.one
+curl --proto "=https" --tlsv1.3 --cert-status -sS -m10 "$@"
--- a/scripts/get-webmentions.sh
+++ b/scripts/get-webmentions.sh
@ -2,18 +2,48 @@
 #
 # Script to fetch all approved webmentions from webmentiond as a big json response.
 # Uses POSIX and cURL in CI, also uses any pass/pash-compatible pwmngr otherwise
-# The response is cached for 90 minutes.
+# The response is cached for 90 minutes. If fetching fresh webmentions fails, fall back to the cached version for up to one day (1440 minutes). If cached webmentions are older than a day, error out.

 set -e -u

+dirname="$(dirname "$0")"
+curl_wrapper="$dirname/curl-wrapper.sh"
 auth_url='https://seirdy.one/webmentions/authenticate/access-key'
 webmentions_url='https://seirdy.one/webmentions/manage/mentions?limit=9999&status=approved'
-webmentions_file="$(realpath data/webmentions.json)"
+webmentions_file="$dirname/../data/webmentions.json"

-# just a little curl wrapper I use on seirdy.one
-alias ccurl='curl --proto "=https" --proto-default https --tlsv1.3 --cert-status'
+stale_after_minutes=90
+fallback_stale_after_minutes=1440

-# use a long-lived key (password) to fetch a short-lived bearer token.
+skip_check=0
+
+check_cached_webmentions() {
+	if [ "$skip_check" = 0 ]; then
+		expires_in="$fallback_stale_after_minutes"
+		if [ $# -gt 0 ]; then
+			expires_in="$1"
+		else
+			echo "Failed to fetch webmentions. Using fallback cache expiry."
+		fi
+		exit_status=1
+		if [ -f "$webmentions_file" ]; then
+			old_webmention_file="$(find "$webmentions_file" -mmin +"$expires_in")"
+			if [ "$old_webmention_file" = "" ]; then
+				echo 'Using cached webmentions'
+				skip_check=1
+				exit_status=0
+			fi
+		fi
+		if [ "$exit_status" = 1 ]; then
+			echo "webmentions are outdated. failed to fetch for over a day."
+		fi
+		exit "$exit_status"
+	fi
+}
+
+trap check_cached_webmentions EXIT
+
+# Grab my long-lived key (password). We will use this to authenticate.
 key() {
 	set +u
 	if [ -n "$BUILD_SUBMITTER" ]; then
@ -24,23 +54,32 @@ key() {
 	set -u
 }

+# Fetch a short-lived access token from my webmention receiver.
 token() {
 	key_response="$(key)"
-	ccurl -sX POST "$auth_url" -d "key=$key_response"
+	"$curl_wrapper" -sX POST "$auth_url" -d "key=$key_response"
 }

-# use that token to fetch all webmentions
+# Verify that the webmentions file has downloaded succesfully by ensuring that it starts and ends with a given substring.
+# The total number of webmentions should be at least a 3-digit number.
+verify_webmentions() {
+	grep -E '^\{"items":\[\{"id":".*,"total":[0-9]{3}([0-9]*)?\}$' "$webmentions_file.tmp" >/dev/null
+}
+
+# use the token to fetch all webmentions.
 fetch_webmentions() {
+	echo 'Fetching webmentions'
 	token_response="$(token)"
-	ccurl --compressed -H "Authorization: Bearer $token_response" "$webmentions_url" -o "$webmentions_file"
+	"$curl_wrapper" --compressed -H "Authorization: Bearer $token_response" "$webmentions_url" -o "$webmentions_file.tmp" || return 1
+	if verify_webmentions; then
+		mv "$webmentions_file.tmp" "$webmentions_file"
+	else
+		echo 'Error: webmentions failed to verify'
+		exit 1
+	fi
 }

 # fetch webmentions if we don't have a fresh copy already.
-
-if [ -f "$webmentions_file" ] \
-	&& [ "$(find "$webmentions_file" -mmin +90)" = "" ]; then
-	echo 'Using cached webmentions'
-else
-	echo 'Fetching webmentions'
+if ! check_cached_webmentions "$stale_after_minutes"; then
 	fetch_webmentions
 fi
--- a/scripts/vnu.sh
+++ b/scripts/vnu.sh
@ -1,6 +1,6 @@
 #!/bin/sh
 set -e -u
-pwd="$(dirname "$0")"
+dirname="$(dirname "$0")"
 output_dir="$1"

 find_files_to_analyze() {
@ -8,12 +8,9 @@ find_files_to_analyze() {
 		| grep -Ev '(bimi\.svg|search/index\.x?html)$'
 }

-files_to_analyze="$(find_files_to_analyze)"
+# files_to_analyze="$(find_files_to_analyze)"

 # we skip the BIMI icon (VNU can't handle SVG 1.2) and the search page (it has raw templates).
-vnu \
-	--stdout \
-	--format json \
-	--also-check-svg \
-	$files_to_analyze \
-	| sh "$pwd/filter-vnu.sh"
+find_files_to_analyze \
+	| xargs vnu --stdout --format json --also-check-svg \
+	| sh "$dirname/filter-vnu.sh"