1
0
Fork 0
mirror of https://git.sr.ht/~seirdy/seirdy.one synced 2024-11-14 01:32:11 +00:00

xhtmlize: better formatting, exit on err

- make xhtmlize exit early if any file fails to be xhtml-ized
- add whitespace before self-closing slash.
This commit is contained in:
Rohan Kumar 2022-06-13 08:27:09 -07:00
parent 5c7f2f17ea
commit 114e2d88ee
No known key found for this signature in database
GPG key ID: 1E892DB2A5F84479
2 changed files with 13 additions and 10 deletions

View file

@ -15,19 +15,20 @@
set -e -u set -e -u
export html_file="$1" html_file="$1"
export tmp_file="$html_file.tmp" tmp_file="$html_file.tmp"
export xhtml_file=${html_file%*.html}.xhtml xhtml_file=${html_file%*.html}.xhtml
cleanup() { cleanup() {
rm -f "$tmp_file" rm -f "$tmp_file"
} }
trap cleanup EXIT trap cleanup EXIT
trap cleanup EXIT # delete the stylesheet from the html file; we'll re-insert it later.
# Also remove one indentation level
sed 7d "$html_file" | xmllint --format --encode UTF-8 --noent - | sd '^\t' '' >"$tmp_file" sed 7d "$html_file" | xmllint --format --encode UTF-8 --noent - | sd '^\t' '' >"$tmp_file"
{ {
head -n7 "$tmp_file" head -n7 "$tmp_file" | sd -s '/>' ' />'
cat tmp.css cat tmp.css
# shellcheck disable=SC2016 # these are regex statements, not shell expressions # shellcheck disable=SC2016 # these are regex statements, not shell expressions
tail -n +8 "$tmp_file" \ tail -n +8 "$tmp_file" \
@ -35,12 +36,14 @@ sed 7d "$html_file" | xmllint --format --encode UTF-8 --noent - | sd '^\t' '' >"
| sd '(?:\n)?</(code|samp)>\n(?:[\t\s]*)?</pre>' '</$1></pre>' \ | sd '(?:\n)?</(code|samp)>\n(?:[\t\s]*)?</pre>' '</$1></pre>' \
| sd '</span>.span itemprop="familyName"' '</span> <span itemprop="familyName"' \ | sd '</span>.span itemprop="familyName"' '</span> <span itemprop="familyName"' \
| sd '(</picture>|src="[^"]*" ?/>)<span itemprop="name" class="p-name fn n">' '$1 <span itemprop="name" class="p-name fn n">' \ | sd '(</picture>|src="[^"]*" ?/>)<span itemprop="name" class="p-name fn n">' '$1 <span itemprop="name" class="p-name fn n">' \
| sd '([a-z])<(data|time)' '$1 <$2' | sd '([a-z])<(data|time)' '$1 <$2' \
| sd -s '/>' ' />'
} >>"$xhtml_file" } >>"$xhtml_file"
# replace the html file with the formatted xhtml5 file, excluding the xml declaration # replace the html file with the formatted xhtml5 file, excluding the
# XML declaration.
tail -n +2 "$xhtml_file" > "$html_file" tail -n +2 "$xhtml_file" > "$html_file"
# remove the redundant charset declaration from the xhtml file. It's the # remove the redundant charset declaration from the xhtml file. It's the
# first thing in the <head> # first thing in the <head>.
sed -i 5d "$xhtml_file" # busybox sed supports "-i" sed -i 5d "$xhtml_file" # busybox sed supports "-i"

View file

@ -1,6 +1,7 @@
#!/bin/sh #!/bin/sh
# takes an arg for the output dir. # takes an arg for the output dir.
# Runs xhtmlize-single-file.sh on every single html file in the output dir. # Runs xhtmlize-single-file.sh on every single html file in the output dir.
# exits if xhtmlize-single-file fails.
set -e -u set -e -u
@ -14,6 +15,5 @@ cleanup() {
trap cleanup EXIT trap cleanup EXIT
export XMLLINT_INDENT=' ' export XMLLINT_INDENT=' '
find "$output_dir" -type f -name '*.html' \ time -p find "$output_dir" -type f -name '*.html' | xargs -n1 sh "$script_dir/xhtmlize-single-file.sh"
-exec sh "$script_dir/xhtmlize-single-file.sh" {} \;
# done # done