mirror of
https://git.sr.ht/~seirdy/seirdy.one
synced 2024-11-27 14:12:09 +00:00
Refactor post-processing to use sed instead of sd
html-tidy takes care of some post-processing, rendering other substitutions obsolete. Remove the obsolete regex substitutions. Now that we did that, the remaining substitutions can be done with vanilla POSIX or POSIX-Extended regular expressions. Replace sd with sed, and group the substitutions together into one invocation instead of multiple invocations piped together. This change speeds up post-processing to be almost as fast as the initial build step.
This commit is contained in:
parent
4ca800f1c3
commit
649f827f66
3 changed files with 20 additions and 57 deletions
|
@ -6,3 +6,5 @@ output-xhtml: yes
|
||||||
quiet: yes
|
quiet: yes
|
||||||
indent: no
|
indent: no
|
||||||
tidy-mark: no
|
tidy-mark: no
|
||||||
|
quote-nbsp: no
|
||||||
|
sort-attributes: alpha
|
||||||
|
|
|
@ -56,60 +56,20 @@ while getopts "hd" flags; do
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
trim_trailing_comma() {
|
|
||||||
sd ',$' ''
|
|
||||||
}
|
|
||||||
|
|
||||||
values_to_csv() {
|
|
||||||
tr '\n' ',' | trim_trailing_comma && echo
|
|
||||||
}
|
|
||||||
|
|
||||||
# values for the GEORGE webring
|
|
||||||
# Left bc I quit trying to make a good first-party iframe alternative
|
|
||||||
# that conformed to my site design standards while also imparting the
|
|
||||||
# message of GEORGE as intended.
|
|
||||||
# george() {
|
|
||||||
# printf 'GEORGE,'
|
|
||||||
# {
|
|
||||||
# curl -sSL --compressed 'https://george.gh0.pw/embed.cgi?seirdy' \
|
|
||||||
# | htmlq -a href 'main p a'
|
|
||||||
# echo "null"
|
|
||||||
# } | values_to_csv
|
|
||||||
# }
|
|
||||||
#
|
|
||||||
endless_orbit() {
|
endless_orbit() {
|
||||||
printf 'Endless Orbit,'
|
printf 'Endless Orbit,'
|
||||||
{
|
|
||||||
curl -sSL --compressed https://linkyblog.neocities.org/onionring/onionring-variables.js \
|
curl -sSL --compressed https://linkyblog.neocities.org/onionring/onionring-variables.js \
|
||||||
| grep -C 1 https://seirdy.one/
|
| grep -C 1 https://seirdy.one/ \
|
||||||
echo "'null',"
|
| tr -d "'\n" | sed 's|https://seirdy.one/|https://linkyblog.neocities.org/webring.html|'
|
||||||
} | sd https://seirdy.one/ https://linkyblog.neocities.org/webring.html \
|
echo 'null'
|
||||||
| sd "\n|'" '' | trim_trailing_comma
|
|
||||||
echo
|
|
||||||
}
|
|
||||||
|
|
||||||
netizens() {
|
|
||||||
printf 'Netizens,'
|
|
||||||
{
|
|
||||||
curl -sSL --compressed https://netizensring.link/onionring-variables.js \
|
|
||||||
| grep -C 1 https://seirdy.one/
|
|
||||||
} | sd 'https://seirdy.one/,?' 'https://netizensring.link/' \
|
|
||||||
| sd "\n|'|\r" '' | trim_trailing_comma
|
|
||||||
echo ',null'
|
|
||||||
}
|
|
||||||
|
|
||||||
print_csv_values() {
|
|
||||||
# george
|
|
||||||
endless_orbit
|
|
||||||
# netizens
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if [ "$dry_run" = '1' ]; then
|
if [ "$dry_run" = '1' ]; then
|
||||||
print_csv_values
|
endless_orbit
|
||||||
elif [ -f "$webrings_dest" ]; then
|
elif [ -f "$webrings_dest" ]; then
|
||||||
echo "webrings file already generated"
|
echo "webrings file already generated"
|
||||||
else
|
else
|
||||||
print_csv_values | cat "$webrings_src" - >"$webrings_dest"
|
endless_orbit | cat "$webrings_src" - >"$webrings_dest"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# vi:ft=sh
|
# vi:ft=sh
|
||||||
|
|
|
@ -10,13 +10,13 @@
|
||||||
# use xmllint to do the formatting.
|
# use xmllint to do the formatting.
|
||||||
# xmllint ruins inline CSS so delete the inline CSS and re-insert it.
|
# xmllint ruins inline CSS so delete the inline CSS and re-insert it.
|
||||||
# xmllint also adds extra whitespace around <pre><code> which we remove
|
# xmllint also adds extra whitespace around <pre><code> which we remove
|
||||||
# with "sd". I chose sd since it handles newlines well.
|
# with sed.
|
||||||
# It also decreases indents by one level
|
# It also decreases indents by one level
|
||||||
|
|
||||||
set -e -u
|
set -e -u
|
||||||
|
|
||||||
html_file="$1"
|
html_file="$1"
|
||||||
tmp_file="$html_file.tmp"
|
tmp_file="$(mktemp)"
|
||||||
xhtml_file=${html_file%*.html}.xhtml
|
xhtml_file=${html_file%*.html}.xhtml
|
||||||
|
|
||||||
cleanup() {
|
cleanup() {
|
||||||
|
@ -30,17 +30,18 @@ run_tidy () {
|
||||||
|
|
||||||
# delete the stylesheet from the html file; we'll re-insert it later.
|
# delete the stylesheet from the html file; we'll re-insert it later.
|
||||||
# Also remove two indentation levels
|
# Also remove two indentation levels
|
||||||
sed 7d "$html_file" | xmllint --format --encode UTF-8 --noent - | tail -n +2 | sd '^\t(?:\t)?' '' | run_tidy >"$tmp_file"
|
sed 7d "$html_file" | xmllint --format --encode UTF-8 --noent - | tail -n +2 | run_tidy >"$tmp_file"
|
||||||
{
|
{
|
||||||
head -n7 "$tmp_file"
|
head -n7 "$tmp_file"
|
||||||
cat "$OUTPUT_DIR/tmp.css"
|
cat "$OUTPUT_DIR/tmp.css"
|
||||||
# shellcheck disable=SC2016 # these are regex statements, not shell expressions
|
# shellcheck disable=SC2016 # these are regex statements, not shell expressions
|
||||||
tail -n +8 "$tmp_file" \
|
sed \
|
||||||
| sd '<pre(?: tabindex="0")?>\n(?:\t|\s)*<(code|samp)( |>)' '<pre tabindex="0"><$1$2' \
|
-e '1,7d' \
|
||||||
| sd '(?:\n)?</(code|samp)>\n(?:[\t\s]*)?</pre>' '</$1></pre>' \
|
-e 's|\.svg" width="16" /><span|svg" width="16" /> <span|' \
|
||||||
| sd '</span>(?: )?.span itemprop="familyName"' '</span> <span itemprop="familyName"' \
|
-e 's|</span>( )?.span itemprop="familyName|</span> <span itemprop="familyName"|' \
|
||||||
| sd -s ' ' ' ' \
|
-E \
|
||||||
| sd -f m 'class="u-photo photo"[^<]*<' 'class="u-photo photo"/> <' \
|
-e 's|([a-z])<data|\1 <data|' \
|
||||||
| sd '([a-z])<(data|time)' '$1 <$2' \
|
-e 's#</span>(<a[^>]*rel="(nofollow ugc|ugc nofollow)"([^>]*)?>liked</a>)#</span> \1#' \
|
||||||
| sd '</span>(<a[^>]*rel="(?:nofollow ugc|ugc nofollow)"(?:[^>]*)?>liked</a>)' '</span> $1'
|
-e 's#<pre( tabindex="0")?>\n(\t|\s)*<(code|samp)( |>)#<pre tabindex="0"><\3\4#' \
|
||||||
|
"$tmp_file"
|
||||||
} >"$html_file"
|
} >"$html_file"
|
||||||
|
|
Loading…
Reference in a new issue