1
0
Fork 0
mirror of https://git.sr.ht/~seirdy/seirdy.one synced 2024-09-19 20:02:10 +00:00

Compare commits

...

2 commits

Author SHA1 Message Date
Rohan Kumar
9d6f7f2209
document the html-tidy dependency 2023-11-13 17:34:33 -08:00
Rohan Kumar
72aa3606c5
use html-tidy to format my pages 2023-11-13 17:33:18 -08:00
3 changed files with 10 additions and 4 deletions

View file

@ -24,6 +24,7 @@ Before deploying, I use some tools to process the output.
- `xmllint`, part of libxml2, to format the generated polygot XHTML5 markup.
- [sd](https://github.com/chmln/sdA) (for advanced multi-line regex operations, much of which exist to fix `xmllint`'s output)
- a patched version of html-tidy
I also apply static compression at max levels, using the following tools:

View file

@ -4,3 +4,5 @@ char-encoding: utf8
input-encoding: utf8
output-xhtml: yes
quiet: yes
indent: no
tidy-mark: no

View file

@ -24,11 +24,15 @@ cleanup() {
}
trap cleanup EXIT
run_tidy () {
tidy -asxhtml -config linter-configs/tidy.conf 2>/dev/null || true
}
# delete the stylesheet from the html file; we'll re-insert it later.
# Also remove two indentation levels
sed 7d "$html_file" | xmllint --format --encode UTF-8 --noent - | tail -n +2 | sd '^\t(?:\t)?' '' >"$tmp_file"
sed 7d "$html_file" | xmllint --format --encode UTF-8 --noent - | tail -n +2 | sd '^\t(?:\t)?' '' | run_tidy >"$tmp_file"
{
head -n7 "$tmp_file" | sd -s '/>' ' />'
head -n7 "$tmp_file"
cat "$OUTPUT_DIR/tmp.css"
# shellcheck disable=SC2016 # these are regex statements, not shell expressions
tail -n +8 "$tmp_file" \
@ -38,6 +42,5 @@ sed 7d "$html_file" | xmllint --format --encode UTF-8 --noent - | tail -n +2 | s
| sd -s ' ' ' ' \
| sd -f m 'class="u-photo photo"[^<]*<' 'class="u-photo photo"/> <' \
| sd '([a-z])<(data|time)' '$1 <$2' \
| sd '</span>(<a[^>]*rel="(?:nofollow ugc|ugc nofollow)"(?:[^>]*)?>liked</a>)' '</span> $1' \
| sd '([^ ])/>' '$1 />'
| sd '</span>(<a[^>]*rel="(?:nofollow ugc|ugc nofollow)"(?:[^>]*)?>liked</a>)' '</span> $1'
} >"$html_file"