From 4910a7c2c114b03c4d8d541f72eb2729d8d088f5 Mon Sep 17 00:00:00 2001 From: Rohan Kumar Date: Wed, 15 Nov 2023 09:21:26 -0800 Subject: [PATCH] Quit using HTML-Tidy See https://github.com/htacg/tidy-html5/issues/1094. The most recent commit without that regression can't handle `dl` elements with `div` children. --- scripts/xhtmlize-single-file.sh | 9 ++++----- scripts/xhtmlize.sh | 4 +++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/scripts/xhtmlize-single-file.sh b/scripts/xhtmlize-single-file.sh index 98e6706..fa91ad4 100644 --- a/scripts/xhtmlize-single-file.sh +++ b/scripts/xhtmlize-single-file.sh @@ -29,7 +29,7 @@ run_tidy () { # delete the stylesheet from the html file; we'll re-insert it later. # Also remove two indentation levels -sed 7d "$html_file" | xmllint --format --encode UTF-8 --noent - | tail -n +2 | run_tidy >"$tmp_file" +sed 7d "$html_file" | xmllint --format --encode UTF-8 --noent - | tail -n +2 >"$tmp_file" { head -n7 "$tmp_file" cat "${OUTPUT_DIR:?}/tmp.css" @@ -37,12 +37,11 @@ sed 7d "$html_file" | xmllint --format --encode UTF-8 --noent - | tail -n +2 | r #shellcheck source=/home/rkumar/Executables/ghq/git.sr.ht/~seirdy/seirdy.one/scripts/xhtmlize.sh sed \ -e '1,7d' \ - -e "s|name=\"generator\" />|name=\"generator\" />\n${TIDY:?}|" \ - -e 's|\.svg" width="16" />  ( )?.span itemprop="familyName| (]*rel="(nofollow ugc|ugc nofollow)"([^>]*)?>liked)# \1#' \ - -e 's#\n(\t|\s)*<(code|samp)( |>)#
<\3\4#' \
-			"$tmp_file"
+			"$tmp_file" \
+		| awk '/^<\/code>/{printf "%s",$0;next}7'
 } >"$html_file"
diff --git a/scripts/xhtmlize.sh b/scripts/xhtmlize.sh
index 6d9762d..4d23fd3 100644
--- a/scripts/xhtmlize.sh
+++ b/scripts/xhtmlize.sh
@@ -11,7 +11,9 @@ script_dir="$(dirname "$0")"
 tidy_version="$(tidy -version)"
 export TIDY=""
 
-sed -e '7q;d' "$output_dir/index.html" | tr -d '\t' >"$output_dir/tmp.css"
+{
+	printf '\t' && sed -e '7q;d' "$output_dir/index.html"
+} >"$output_dir/tmp.css"
 cleanup() {
 	rm -f "$output_dir/tmp.css"
 }