mirror of
https://git.sr.ht/~seirdy/seirdy.one
synced 2024-11-23 12:52:10 +00:00
Refactor post-processing to use sed instead of sd
html-tidy takes care of some post-processing, rendering other substitutions obsolete. Remove the obsolete regex substitutions. Now that we did that, the remaining substitutions can be done with vanilla POSIX or POSIX-Extended regular expressions. Replace sd with sed, and group the substitutions together into one invocation instead of multiple invocations piped together. This change speeds up post-processing to be almost as fast as the initial build step.
This commit is contained in:
parent
4ca800f1c3
commit
649f827f66
3 changed files with 20 additions and 57 deletions
|
@ -6,3 +6,5 @@ output-xhtml: yes
|
|||
quiet: yes
|
||||
indent: no
|
||||
tidy-mark: no
|
||||
quote-nbsp: no
|
||||
sort-attributes: alpha
|
||||
|
|
|
@ -56,60 +56,20 @@ while getopts "hd" flags; do
|
|||
esac
|
||||
done
|
||||
|
||||
trim_trailing_comma() {
|
||||
sd ',$' ''
|
||||
}
|
||||
|
||||
values_to_csv() {
|
||||
tr '\n' ',' | trim_trailing_comma && echo
|
||||
}
|
||||
|
||||
# values for the GEORGE webring
|
||||
# Left bc I quit trying to make a good first-party iframe alternative
|
||||
# that conformed to my site design standards while also imparting the
|
||||
# message of GEORGE as intended.
|
||||
# george() {
|
||||
# printf 'GEORGE,'
|
||||
# {
|
||||
# curl -sSL --compressed 'https://george.gh0.pw/embed.cgi?seirdy' \
|
||||
# | htmlq -a href 'main p a'
|
||||
# echo "null"
|
||||
# } | values_to_csv
|
||||
# }
|
||||
#
|
||||
endless_orbit() {
|
||||
printf 'Endless Orbit,'
|
||||
{
|
||||
curl -sSL --compressed https://linkyblog.neocities.org/onionring/onionring-variables.js \
|
||||
| grep -C 1 https://seirdy.one/
|
||||
echo "'null',"
|
||||
} | sd https://seirdy.one/ https://linkyblog.neocities.org/webring.html \
|
||||
| sd "\n|'" '' | trim_trailing_comma
|
||||
echo
|
||||
}
|
||||
|
||||
netizens() {
|
||||
printf 'Netizens,'
|
||||
{
|
||||
curl -sSL --compressed https://netizensring.link/onionring-variables.js \
|
||||
| grep -C 1 https://seirdy.one/
|
||||
} | sd 'https://seirdy.one/,?' 'https://netizensring.link/' \
|
||||
| sd "\n|'|\r" '' | trim_trailing_comma
|
||||
echo ',null'
|
||||
}
|
||||
|
||||
print_csv_values() {
|
||||
# george
|
||||
endless_orbit
|
||||
# netizens
|
||||
curl -sSL --compressed https://linkyblog.neocities.org/onionring/onionring-variables.js \
|
||||
| grep -C 1 https://seirdy.one/ \
|
||||
| tr -d "'\n" | sed 's|https://seirdy.one/|https://linkyblog.neocities.org/webring.html|'
|
||||
echo 'null'
|
||||
}
|
||||
|
||||
if [ "$dry_run" = '1' ]; then
|
||||
print_csv_values
|
||||
endless_orbit
|
||||
elif [ -f "$webrings_dest" ]; then
|
||||
echo "webrings file already generated"
|
||||
else
|
||||
print_csv_values | cat "$webrings_src" - >"$webrings_dest"
|
||||
endless_orbit | cat "$webrings_src" - >"$webrings_dest"
|
||||
fi
|
||||
|
||||
# vi:ft=sh
|
||||
|
|
|
@ -10,13 +10,13 @@
|
|||
# use xmllint to do the formatting.
|
||||
# xmllint ruins inline CSS so delete the inline CSS and re-insert it.
|
||||
# xmllint also adds extra whitespace around <pre><code> which we remove
|
||||
# with "sd". I chose sd since it handles newlines well.
|
||||
# with sed.
|
||||
# It also decreases indents by one level
|
||||
|
||||
set -e -u
|
||||
|
||||
html_file="$1"
|
||||
tmp_file="$html_file.tmp"
|
||||
tmp_file="$(mktemp)"
|
||||
xhtml_file=${html_file%*.html}.xhtml
|
||||
|
||||
cleanup() {
|
||||
|
@ -30,17 +30,18 @@ run_tidy () {
|
|||
|
||||
# delete the stylesheet from the html file; we'll re-insert it later.
|
||||
# Also remove two indentation levels
|
||||
sed 7d "$html_file" | xmllint --format --encode UTF-8 --noent - | tail -n +2 | sd '^\t(?:\t)?' '' | run_tidy >"$tmp_file"
|
||||
sed 7d "$html_file" | xmllint --format --encode UTF-8 --noent - | tail -n +2 | run_tidy >"$tmp_file"
|
||||
{
|
||||
head -n7 "$tmp_file"
|
||||
cat "$OUTPUT_DIR/tmp.css"
|
||||
# shellcheck disable=SC2016 # these are regex statements, not shell expressions
|
||||
tail -n +8 "$tmp_file" \
|
||||
| sd '<pre(?: tabindex="0")?>\n(?:\t|\s)*<(code|samp)( |>)' '<pre tabindex="0"><$1$2' \
|
||||
| sd '(?:\n)?</(code|samp)>\n(?:[\t\s]*)?</pre>' '</$1></pre>' \
|
||||
| sd '</span>(?: )?.span itemprop="familyName"' '</span> <span itemprop="familyName"' \
|
||||
| sd -s ' ' ' ' \
|
||||
| sd -f m 'class="u-photo photo"[^<]*<' 'class="u-photo photo"/> <' \
|
||||
| sd '([a-z])<(data|time)' '$1 <$2' \
|
||||
| sd '</span>(<a[^>]*rel="(?:nofollow ugc|ugc nofollow)"(?:[^>]*)?>liked</a>)' '</span> $1'
|
||||
sed \
|
||||
-e '1,7d' \
|
||||
-e 's|\.svg" width="16" /><span|svg" width="16" /> <span|' \
|
||||
-e 's|</span>( )?.span itemprop="familyName|</span> <span itemprop="familyName"|' \
|
||||
-E \
|
||||
-e 's|([a-z])<data|\1 <data|' \
|
||||
-e 's#</span>(<a[^>]*rel="(nofollow ugc|ugc nofollow)"([^>]*)?>liked</a>)#</span> \1#' \
|
||||
-e 's#<pre( tabindex="0")?>\n(\t|\s)*<(code|samp)( |>)#<pre tabindex="0"><\3\4#' \
|
||||
"$tmp_file"
|
||||
} >"$html_file"
|
||||
|
|
Loading…
Reference in a new issue