1
0
Fork 0
mirror of https://git.sr.ht/~seirdy/seirdy.one synced 2024-11-10 00:12:09 +00:00

Refactor post-processing to use sed instead of sd

html-tidy takes care of some post-processing, rendering other
substitutions obsolete. Remove the obsolete regex substitutions.

Now that we did that, the remaining substitutions can be done with
vanilla POSIX or POSIX-Extended regular expressions. Replace sd with
sed, and group the substitutions together into one invocation instead of
multiple invocations piped together. This change speeds up
post-processing to be almost as fast as the initial build step.
This commit is contained in:
Rohan Kumar 2023-11-14 21:39:53 -08:00
parent 4ca800f1c3
commit 649f827f66
No known key found for this signature in database
GPG key ID: 1E892DB2A5F84479
3 changed files with 20 additions and 57 deletions

View file

@ -6,3 +6,5 @@ output-xhtml: yes
quiet: yes quiet: yes
indent: no indent: no
tidy-mark: no tidy-mark: no
quote-nbsp: no
sort-attributes: alpha

View file

@ -56,60 +56,20 @@ while getopts "hd" flags; do
esac esac
done done
trim_trailing_comma() {
sd ',$' ''
}
values_to_csv() {
tr '\n' ',' | trim_trailing_comma && echo
}
# values for the GEORGE webring
# Left bc I quit trying to make a good first-party iframe alternative
# that conformed to my site design standards while also imparting the
# message of GEORGE as intended.
# george() {
# printf 'GEORGE,'
# {
# curl -sSL --compressed 'https://george.gh0.pw/embed.cgi?seirdy' \
# | htmlq -a href 'main p a'
# echo "null"
# } | values_to_csv
# }
#
endless_orbit() { endless_orbit() {
printf 'Endless Orbit,' printf 'Endless Orbit,'
{
curl -sSL --compressed https://linkyblog.neocities.org/onionring/onionring-variables.js \ curl -sSL --compressed https://linkyblog.neocities.org/onionring/onionring-variables.js \
| grep -C 1 https://seirdy.one/ | grep -C 1 https://seirdy.one/ \
echo "'null'," | tr -d "'\n" | sed 's|https://seirdy.one/|https://linkyblog.neocities.org/webring.html|'
} | sd https://seirdy.one/ https://linkyblog.neocities.org/webring.html \ echo 'null'
| sd "\n|'" '' | trim_trailing_comma
echo
}
netizens() {
printf 'Netizens,'
{
curl -sSL --compressed https://netizensring.link/onionring-variables.js \
| grep -C 1 https://seirdy.one/
} | sd 'https://seirdy.one/,?' 'https://netizensring.link/' \
| sd "\n|'|\r" '' | trim_trailing_comma
echo ',null'
}
print_csv_values() {
# george
endless_orbit
# netizens
} }
if [ "$dry_run" = '1' ]; then if [ "$dry_run" = '1' ]; then
print_csv_values endless_orbit
elif [ -f "$webrings_dest" ]; then elif [ -f "$webrings_dest" ]; then
echo "webrings file already generated" echo "webrings file already generated"
else else
print_csv_values | cat "$webrings_src" - >"$webrings_dest" endless_orbit | cat "$webrings_src" - >"$webrings_dest"
fi fi
# vi:ft=sh # vi:ft=sh

View file

@ -10,13 +10,13 @@
# use xmllint to do the formatting. # use xmllint to do the formatting.
# xmllint ruins inline CSS so delete the inline CSS and re-insert it. # xmllint ruins inline CSS so delete the inline CSS and re-insert it.
# xmllint also adds extra whitespace around <pre><code> which we remove # xmllint also adds extra whitespace around <pre><code> which we remove
# with "sd". I chose sd since it handles newlines well. # with sed.
# It also decreases indents by one level # It also decreases indents by one level
set -e -u set -e -u
html_file="$1" html_file="$1"
tmp_file="$html_file.tmp" tmp_file="$(mktemp)"
xhtml_file=${html_file%*.html}.xhtml xhtml_file=${html_file%*.html}.xhtml
cleanup() { cleanup() {
@ -30,17 +30,18 @@ run_tidy () {
# delete the stylesheet from the html file; we'll re-insert it later. # delete the stylesheet from the html file; we'll re-insert it later.
# Also remove two indentation levels # Also remove two indentation levels
sed 7d "$html_file" | xmllint --format --encode UTF-8 --noent - | tail -n +2 | sd '^\t(?:\t)?' '' | run_tidy >"$tmp_file" sed 7d "$html_file" | xmllint --format --encode UTF-8 --noent - | tail -n +2 | run_tidy >"$tmp_file"
{ {
head -n7 "$tmp_file" head -n7 "$tmp_file"
cat "$OUTPUT_DIR/tmp.css" cat "$OUTPUT_DIR/tmp.css"
# shellcheck disable=SC2016 # these are regex statements, not shell expressions # shellcheck disable=SC2016 # these are regex statements, not shell expressions
tail -n +8 "$tmp_file" \ sed \
| sd '<pre(?: tabindex="0")?>\n(?:\t|\s)*<(code|samp)( |>)' '<pre tabindex="0"><$1$2' \ -e '1,7d' \
| sd '(?:\n)?</(code|samp)>\n(?:[\t\s]*)?</pre>' '</$1></pre>' \ -e 's|\.svg" width="16" /><span|svg" width="16" /> <span|' \
| sd '</span>(?:&nbsp;)?.span itemprop="familyName"' '</span>&#160;<span itemprop="familyName"' \ -e 's|</span>(&nbsp;)?.span itemprop="familyName|</span>&#160;<span itemprop="familyName"|' \
| sd -s '&nbsp;' '&#160;' \ -E \
| sd -f m 'class="u-photo photo"[^<]*<' 'class="u-photo photo"/> <' \ -e 's|([a-z])<data|\1 <data|' \
| sd '([a-z])<(data|time)' '$1 <$2' \ -e 's#</span>(<a[^>]*rel="(nofollow ugc|ugc nofollow)"([^>]*)?>liked</a>)#</span> \1#' \
| sd '</span>(<a[^>]*rel="(?:nofollow ugc|ugc nofollow)"(?:[^>]*)?>liked</a>)' '</span> $1' -e 's#<pre( tabindex="0")?>\n(\t|\s)*<(code|samp)( |>)#<pre tabindex="0"><\3\4#' \
"$tmp_file"
} >"$html_file" } >"$html_file"