mirror of
https://git.sr.ht/~seirdy/seirdy.one
synced 2024-11-23 12:52:10 +00:00
xhtmlize: better formatting, exit on err
- make xhtmlize exit early if any file fails to be xhtml-ized - add whitespace before self-closing slash.
This commit is contained in:
parent
5c7f2f17ea
commit
114e2d88ee
2 changed files with 13 additions and 10 deletions
|
@ -15,19 +15,20 @@
|
|||
|
||||
set -e -u
|
||||
|
||||
export html_file="$1"
|
||||
export tmp_file="$html_file.tmp"
|
||||
export xhtml_file=${html_file%*.html}.xhtml
|
||||
html_file="$1"
|
||||
tmp_file="$html_file.tmp"
|
||||
xhtml_file=${html_file%*.html}.xhtml
|
||||
|
||||
cleanup() {
|
||||
rm -f "$tmp_file"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
trap cleanup EXIT
|
||||
# delete the stylesheet from the html file; we'll re-insert it later.
|
||||
# Also remove one indentation level
|
||||
sed 7d "$html_file" | xmllint --format --encode UTF-8 --noent - | sd '^\t' '' >"$tmp_file"
|
||||
{
|
||||
head -n7 "$tmp_file"
|
||||
head -n7 "$tmp_file" | sd -s '/>' ' />'
|
||||
cat tmp.css
|
||||
# shellcheck disable=SC2016 # these are regex statements, not shell expressions
|
||||
tail -n +8 "$tmp_file" \
|
||||
|
@ -35,12 +36,14 @@ sed 7d "$html_file" | xmllint --format --encode UTF-8 --noent - | sd '^\t' '' >"
|
|||
| sd '(?:\n)?</(code|samp)>\n(?:[\t\s]*)?</pre>' '</$1></pre>' \
|
||||
| sd '</span>.span itemprop="familyName"' '</span> <span itemprop="familyName"' \
|
||||
| sd '(</picture>|src="[^"]*" ?/>)<span itemprop="name" class="p-name fn n">' '$1 <span itemprop="name" class="p-name fn n">' \
|
||||
| sd '([a-z])<(data|time)' '$1 <$2'
|
||||
| sd '([a-z])<(data|time)' '$1 <$2' \
|
||||
| sd -s '/>' ' />'
|
||||
} >>"$xhtml_file"
|
||||
|
||||
# replace the html file with the formatted xhtml5 file, excluding the xml declaration
|
||||
# replace the html file with the formatted xhtml5 file, excluding the
|
||||
# XML declaration.
|
||||
tail -n +2 "$xhtml_file" > "$html_file"
|
||||
|
||||
# remove the redundant charset declaration from the xhtml file. It's the
|
||||
# first thing in the <head>
|
||||
# first thing in the <head>.
|
||||
sed -i 5d "$xhtml_file" # busybox sed supports "-i"
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#!/bin/sh
|
||||
# takes an arg for the output dir.
|
||||
# Runs xhtmlize-single-file.sh on every single html file in the output dir.
|
||||
# exits if xhtmlize-single-file fails.
|
||||
|
||||
set -e -u
|
||||
|
||||
|
@ -14,6 +15,5 @@ cleanup() {
|
|||
trap cleanup EXIT
|
||||
|
||||
export XMLLINT_INDENT=' '
|
||||
find "$output_dir" -type f -name '*.html' \
|
||||
-exec sh "$script_dir/xhtmlize-single-file.sh" {} \;
|
||||
time -p find "$output_dir" -type f -name '*.html' | xargs -n1 sh "$script_dir/xhtmlize-single-file.sh"
|
||||
# done
|
||||
|
|
Loading…
Reference in a new issue