1
0
Fork 0
mirror of https://git.sr.ht/~seirdy/seirdy.one synced 2024-11-10 00:12:09 +00:00

Refactor post-processing to use sed instead of sd

html-tidy takes care of some post-processing, rendering other
substitutions obsolete. Remove the obsolete regex substitutions.

Now that we did that, the remaining substitutions can be done with
vanilla POSIX or POSIX-Extended regular expressions. Replace sd with
sed, and group the substitutions together into one invocation instead of
multiple invocations piped together. This change speeds up
post-processing to be almost as fast as the initial build step.
This commit is contained in:
Rohan Kumar 2023-11-14 21:39:53 -08:00
parent 4ca800f1c3
commit 649f827f66
No known key found for this signature in database
GPG key ID: 1E892DB2A5F84479
3 changed files with 20 additions and 57 deletions

View file

@ -6,3 +6,5 @@ output-xhtml: yes
quiet: yes
indent: no
tidy-mark: no
quote-nbsp: no
sort-attributes: alpha

View file

@ -56,60 +56,20 @@ while getopts "hd" flags; do
esac
done
trim_trailing_comma() {
sd ',$' ''
}
values_to_csv() {
tr '\n' ',' | trim_trailing_comma && echo
}
# values for the GEORGE webring
# Left bc I quit trying to make a good first-party iframe alternative
# that conformed to my site design standards while also imparting the
# message of GEORGE as intended.
# george() {
# printf 'GEORGE,'
# {
# curl -sSL --compressed 'https://george.gh0.pw/embed.cgi?seirdy' \
# | htmlq -a href 'main p a'
# echo "null"
# } | values_to_csv
# }
#
endless_orbit() {
printf 'Endless Orbit,'
{
curl -sSL --compressed https://linkyblog.neocities.org/onionring/onionring-variables.js \
| grep -C 1 https://seirdy.one/
echo "'null',"
} | sd https://seirdy.one/ https://linkyblog.neocities.org/webring.html \
| sd "\n|'" '' | trim_trailing_comma
echo
}
netizens() {
printf 'Netizens,'
{
curl -sSL --compressed https://netizensring.link/onionring-variables.js \
| grep -C 1 https://seirdy.one/
} | sd 'https://seirdy.one/,?' 'https://netizensring.link/' \
| sd "\n|'|\r" '' | trim_trailing_comma
echo ',null'
}
print_csv_values() {
# george
endless_orbit
# netizens
| grep -C 1 https://seirdy.one/ \
| tr -d "'\n" | sed 's|https://seirdy.one/|https://linkyblog.neocities.org/webring.html|'
echo 'null'
}
if [ "$dry_run" = '1' ]; then
print_csv_values
endless_orbit
elif [ -f "$webrings_dest" ]; then
echo "webrings file already generated"
else
print_csv_values | cat "$webrings_src" - >"$webrings_dest"
endless_orbit | cat "$webrings_src" - >"$webrings_dest"
fi
# vi:ft=sh

View file

@ -10,13 +10,13 @@
# use xmllint to do the formatting.
# xmllint ruins inline CSS so delete the inline CSS and re-insert it.
# xmllint also adds extra whitespace around <pre><code> which we remove
# with "sd". I chose sd since it handles newlines well.
# with sed.
# It also decreases indents by one level
set -e -u
html_file="$1"
tmp_file="$html_file.tmp"
tmp_file="$(mktemp)"
xhtml_file=${html_file%*.html}.xhtml
cleanup() {
@ -30,17 +30,18 @@ run_tidy () {
# delete the stylesheet from the html file; we'll re-insert it later.
# Also remove two indentation levels
sed 7d "$html_file" | xmllint --format --encode UTF-8 --noent - | tail -n +2 | sd '^\t(?:\t)?' '' | run_tidy >"$tmp_file"
sed 7d "$html_file" | xmllint --format --encode UTF-8 --noent - | tail -n +2 | run_tidy >"$tmp_file"
{
head -n7 "$tmp_file"
cat "$OUTPUT_DIR/tmp.css"
# shellcheck disable=SC2016 # these are regex statements, not shell expressions
tail -n +8 "$tmp_file" \
| sd '<pre(?: tabindex="0")?>\n(?:\t|\s)*<(code|samp)( |>)' '<pre tabindex="0"><$1$2' \
| sd '(?:\n)?</(code|samp)>\n(?:[\t\s]*)?</pre>' '</$1></pre>' \
| sd '</span>(?:&nbsp;)?.span itemprop="familyName"' '</span>&#160;<span itemprop="familyName"' \
| sd -s '&nbsp;' '&#160;' \
| sd -f m 'class="u-photo photo"[^<]*<' 'class="u-photo photo"/> <' \
| sd '([a-z])<(data|time)' '$1 <$2' \
| sd '</span>(<a[^>]*rel="(?:nofollow ugc|ugc nofollow)"(?:[^>]*)?>liked</a>)' '</span> $1'
sed \
-e '1,7d' \
-e 's|\.svg" width="16" /><span|svg" width="16" /> <span|' \
-e 's|</span>(&nbsp;)?.span itemprop="familyName|</span>&#160;<span itemprop="familyName"|' \
-E \
-e 's|([a-z])<data|\1 <data|' \
-e 's#</span>(<a[^>]*rel="(nofollow ugc|ugc nofollow)"([^>]*)?>liked</a>)#</span> \1#' \
-e 's#<pre( tabindex="0")?>\n(\t|\s)*<(code|samp)( |>)#<pre tabindex="0"><\3\4#' \
"$tmp_file"
} >"$html_file"