uorss/uorss.sh
2022-02-27 10:32:02 +00:00

375 lines
12 KiB
Bash
Executable File

#!/usr/bin/env bash
VERSION="1.0.0"
CONFIG_DIR="$UORSS_DIR"
set -u
set +H
# This will be removed, dont rely on it
[ -z "$CONFIG_DIR" ] && echo "main: UORSS_DIR is not defined, assuming working directory. This is potentially unsafe"
[ -z "$CONFIG_DIR" ] && CONFIG_DIR="$PWD"
CONFIGD_DIR="$CONFIG_DIR/config.d"
CACHE_DIR="$CONFIG_DIR/cache"
echo "main: Running as $(whoami). Version $VERSION. Directory at $CONFIG_DIR"
die () { echo "$@" 1>&2; exit 1; }
stripCtrlChars () { sed 's/\x1b/\\x1b/g' /dev/stdin; return $?; }
[ -d "$CONFIG_DIR" ] || die "main: No uorss directory defined! See readme."
[ -d "$CONFIGD_DIR" ] || die "main: No config.d directory defined! See readme."
str_startswith () {
# str_startsswith: prefix string
local alen
local blen
local end
alen=${#1}
blen=${#2}
if [ "$alen" -gt "$blen" ]; then
return 1
else
if [ "$1" == "${2:0:$alen}" ]; then
return 0
else
return 1
fi
fi
}
str_endswith () {
# str_endswith: affix string
local alen
local blen
local end
alen=${#1}
blen=${#2}
if [ "$alen" -gt "$blen" ]; then
return 1
else
end=$((blen - alen))
if [ "$1" == "${2:$end:$alen}" ]; then
return 0
else
return 1
fi
fi
}
# Merge all files in config.d/
createConfig () {
# createConfig:
local filepath
local filebase
local json
local code
local config
config="{}"
for filepath in "$CONFIGD_DIR/"*; do
filebase="$(basename "$filepath")"
if [ -d "$filepath" ]; then
echo "createConfig: Ignoring $filebase, Is a directory." 1>&2
elif str_startswith "~" "$filebase" || str_endswith "~" "$filebase"; then
echo "createConfig: Ignoring $filebase. Is a temporary file!" 1>&2
elif str_startswith "." "$filebase"; then
echo "createConfig: Ignoring $filebase. Is a hidden file!" 1>&2
elif str_endswith ".yml" "$filebase" || str_endswith ".yaml" "$filebase" || str_endswith ".json" "$filebase"; then
json="$(yq . "$filepath" -c)"
code="$?"
if [ $code -eq 0 ]; then
config="$(jq --argjson a "$config" --argjson b "$json" \
'$a * $b * {
artists: ($a.artists + $b.artists)
}' -nc)" || die "createConfig: Failed to merge $filebase with in-memory config. Exited with code $?"
else
echo "createConfig: Failed to load $filebase. Exited with code $code" 1>&2
exit $code
fi
else
echo "createConfig: Ignoring $(basename "$filebase")." 1>&2
fi
done
jq . -c <<< "$config"
}
CONFIG="$(createConfig)" || die "main: Failed to load config.d! Exited with $?"
SLOW_MODE=""; jq -e .slow_mode 1>/dev/null <<< "$CONFIG" && SLOW_MODE="1"
STALE_MODE=""; jq -e .stale_mode 1>/dev/null <<< "$CONFIG" && STALE_MODE="1"
CACHE_ENTRY_FILES=""; jq -e .cache_entry_files 1>/dev/null <<< "$CONFIG" && CACHE_ENTRY_FILES="1"
DEPTH="$(jq .depth -r <<< "$CONFIG")"
BASE_URL="$(jq .base_url -r <<< "$CONFIG")"
PIXIV_BASE_URL="$(jq .pixiv_base_url -r <<< "$CONFIG")"
SESSION_ID="$(jq .session_id -r <<< "$CONFIG")"
USER_AGENT="$(jq .user_agent -r <<< "$CONFIG")"
COOKIES="Cookie: PHPSESSID=$SESSION_ID"
[ "$BASE_URL" == "null" ] && die "main: base_url cannot be null!"
[ "$PIXIV_BASE_URL" == "null" ] && die "main: pixiv_base_url cannot be null!"
[ "$SESSION_ID" == "null" ] && die "main: session_id cannot be null!"
[ -n "$STALE_MODE" ] && echo "main: Stale mode is on. New posts by artists will not be fetched!" 1>&2
arrayIndexList () {
# arrayIndexList: array
local length
length="$(jq 'length' -r <<< "$1")"
if [ -n "$length" ]; then
seq 1 "$length"
fi
}
getArtworkData () {
# getArtworkData: artwork_id artist_id
local json_file
local json
json_file="$CACHE_DIR/artwork/$1.pages.json"
if [ -f "$json_file" ]; then
echo "getArtworkData: Cache HIT for $2:$1" 1>&2
else
echo "getArtworkData: Cache MISS for $2:$1" 1>&2
curl -sf \
-A "$USER_AGENT" \
-H "$COOKIES" \
-H "Referer: https://www.pixiv.net/" \
"https://www.pixiv.net/ajax/illust/$1/pages" 2>/dev/null > "$json_file"
fi
cat "$json_file"
}
getArtworkInfo () {
# getArtworkInfo: artwork_id artist_id
local json_file
local json
json_file="$CACHE_DIR/artwork/$1.info.json"
if [ -f "$json_file" ]; then
echo "getArtworkInfo: Cache HIT for $2:$1" 1>&2
else
echo "getArtworkInfo: Cache MISS for $2:$1" 1>&2
curl -sf \
-A "$USER_AGENT" \
-H "$COOKIES" \
-H "Referer: https://www.pixiv.net/" \
"https://www.pixiv.net/ajax/illust/$1" 2>/dev/null > "$json_file"
fi
cat "$json_file"
}
getArtistInfo () {
# getArtistInfo: artist_id
local json_file
local json
json_file="$CACHE_DIR/users/$1.info.json"
if [ -f "$json_file" ]; then
echo "getArtworkInfo: Cache HIT for $1" 1>&2
else
echo "getArtworkInfo: Cache MISS for $1" 1>&2
curl -sf \
-A "$USER_AGENT" \
-H "$COOKIES" \
-H "Referer: https://www.pixiv.net/" \
"https://www.pixiv.net/ajax/user/$1" 2>/dev/null > "$json_file"
fi
cat "$json_file"
}
hoistArtworkPage () {
# hoistArtworkPage: url artwork_id artist_id
local artist
local pages
local page
artist="$CACHE_DIR/pages/$3"
pages="$artist/$2"
page="$pages/$(basename "$1")"
json_file="$CACHE_DIR/artwork/$1.info.json"
[ -d "$artist" ] || mkdir "$artist"
[ -d "$pages" ] || mkdir "$pages"
if [ -f "$page" ]; then
echo "hoistArtworkPage: Cache HIT for $3:$2 $1" 1>&2
else
echo "hoistArtworkPage: Cache MISS for $3:$2 $1" 1>&2
curl -sf \
-A "$USER_AGENT" \
-H "$COOKIES" \
-H "Referer: https://www.pixiv.net/" \
"$1" 2>/dev/null > "$page~"
mv "$page~" "$page"
return $?
fi
}
hoistArtworkPages () {
# hoistArtworkPages: response artwork_id artist_id
local page_index
local page_json
local page_url
for page_index in $(arrayIndexList "$(jq .body -c <<< "$1")"); do
page_json="$(jq ".body[$((page_index-1))]" -r <<< "$1")" || return 1
page_url="$(jq .urls.original -r <<< "$page_json")" || return 2
if [ "$page_url" == "null" ]; then
echo "$page_json"
return 3
fi
if ! hoistArtworkPage "$page_url" "$2" "$3"; then
echo "hoistArtworkPages: Failed for page $page_index." 1>&2
rm "$CACHE_DIR/pages/$3/$2/$(basename "$page_url")"
fi
if [ -n "$SLOW_MODE" ]; then sleep .3; fi
#ingestArtist "$artist_id" || die "init[$artist_id]: Unexpected exit code when processing artist. $?"
done
return 0
}
hoistArtworkEntry () {
# hoistArtworkEntry: artwork_id artist_id artwork_info_json artwork_pages_json
local file
local id
local href
local pages
local page_json
local page_url
local page_file
local tags
file="$CACHE_DIR/artwork/$1.entry.xml"
[ -n "$CACHE_ENTRY_FILES" ] && [ -f "$file" ] && return 0
id="$PIXIV_BASE_URL/artworks/$(jq '.body.illustId | @uri' -r <<< "$3")"
href="$(jq '.body.extraData.meta.canonical | @html' -r <<< "$3")"
tags="$(jq '.body.tags.tags | map("<a href=\"'"$PIXIV_BASE_URL"'/tags/"+ (.tag | @uri) +"\" title=\""+ (.romaji | @html) +" ("+ (.translation?.en | @html) +")\">" + (.tag | @html) + "</a>") | join(", ")' -r <<< "$3")"
pages=""
for page_index in $(arrayIndexList "$(jq .body -c <<< "$4")"); do
page_json="$(jq ".body[$((page_index-1))]" -r <<< "$4")" || return 2
page_url="$(jq .urls.original -r <<< "$page_json")" || return 2
page_file="$(jq @html -Rr <<< "$BASE_URL/pages/$2/$1/$(basename "$page_url")")"
pages="$pages"'<p><a download="" href="'"$page_file"'"><img src="'"$page_file"'" /></a></p>
'
done
echo '<entry>
<title>'"$(jq '.body.illustTitle | @html' -r <<< "$3")"'</title>
<link href="'"$href"'"/>
<link rel="alternate" type="text/html" href="'"$href"'"/>
<id>'"$id"'</id>
<published>'"$(jq '.body.uploadDate | @html' -r <<< "$3")"'</published>
<updated>'"$(date -Is | jq @html -rR)"'</updated>
<content type="xhtml" xml:base="'"$href"'">
<div xmlns="http://www.w3.org/1999/xhtml">
<p>'"$tags"'</p>
'"$pages"'</div>
</content>
<author><name>'"$(jq '.body.userName | @html' -r <<< "$3")"'</name></author>
</entry>' > "$file"
# <summary>'"$(jq '.body.alt | @html' -r <<< "$3")"'</summary>
}
ingestArtist () {
# ingestArtist: artist_id
local artist_file
local artist_json
local artwork_keys
local artwork_id
local artwork_index
local artwork_pages_json
local artwork_info_json
local feed_url
local feed_file
local entries_file
local artist_url
local artist_name
artist_file="$CACHE_DIR/users/$1.json"
artist_url="$PIXIV_BASE_URL/users/$1"
feed_url="$BASE_URL/feeds/$1.xml"
feed_file="$CACHE_DIR/feeds/$1.xml"
entries_file="$CACHE_DIR/feeds/$1.entries.xml~"
if [ -n "$STALE_MODE" ] && [ -f "$artist_file" ]; then
echo "ingestArtist: Cache HIT for $1"
else
echo "ingestArtist: Cache MISS for $1" 1>&2
curl -sf \
-A "$USER_AGENT" \
-H "$COOKIES" \
-H "Referer: https://www.pixiv.net/" \
"https://www.pixiv.net/ajax/user/$1/works/latest?lang=en" 2>/dev/null > "$artist_file"
fi
artist_info_json="$(getArtistInfo "$1")" || return 1
artist_json="$(cat "$artist_file")"
if jq -e '.error' 1>/dev/null <<< "$artist_info_json"; then
echo "ingestArtist: Error while reading artist $1 info. Message: $(jq .message -r <<< "$artist_info_json")" 1>&2
return 1
fi
if jq -e '.error' 1>/dev/null <<< "$artist_json"; then
echo "ingestArtist: Error while reading artist $1. Message: $(jq .message -r <<< "$artist_json")" 1>&2
return 1
fi
artwork_keys="$(jq ".body.illusts | keys | reverse | .[0:$DEPTH]" -c <<< "$artist_json")"
echo '' > "$feed_file~"
for artwork_index in $(arrayIndexList "$artwork_keys"); do
artwork_id="$(jq ".[$((artwork_index-1))]" -r <<< "$artwork_keys")" || return 2
artwork_info_json="$(getArtworkInfo "$artwork_id" "$1")" || return 3
artwork_pages_json="$(getArtworkData "$artwork_id" "$1")" || return 4
[ -n "$SLOW_MODE" ] && sleep .3
grep -Eq '^[0-9]+$' <<< "$artwork_id" || \
die "ingestArtist: Exception while handling artist $1, artwork with index of $artwork_index. Illustration ID is not numerical!"
hoistArtworkPages "$artwork_pages_json" "$artwork_id" "$artist_id" || echo "ingestArtist: Error while running hoistArtworkPages. Exit code $?" 1>&2
if hoistArtworkEntry "$artwork_id" "$artist_id" "$artwork_info_json" "$artwork_pages_json"; then
cat "$CACHE_DIR/artwork/$artwork_id.entry.xml" >> "$entries_file"
else
echo "ingestArtist: Error while running hoistArtworkEntry. Exit code $?" 1>&2
fi
done
artist_name="$(jq '.body.name | @html' -r <<< "$artist_info_json")"
echo '<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>'"$artist_name"'</title>
<subtitle>'"$artist_name on Pixiv"'</subtitle>
<link href="'"$feed_url"'" rel="self" type="application/atom+xml"/>
<link href="'"$artist_url"'" rel="alternate"/>
<id>'"$artist_url"'</id>
<updated>'"$(date -Is)"'</updated>
<author><name>'"$artist_name"'</name></author>
' > "$feed_file~"
cat "$entries_file" >> "$feed_file~"
rm "$entries_file"
echo '
</feed>' >> "$feed_file~"
mv "$feed_file~" "$feed_file"
}
init () {
local artist_id
local artists_index
for artists_index in $(arrayIndexList "$(jq '.artists' -c <<< "$CONFIG")"); do
artist_id="$(jq ".artists[$((artists_index-1))]" -r <<< "$CONFIG")" || return 1
grep -Eq '^[0-9]+$' <<< "$artist_id" || die "init: Exception while handling artist $artist_id. Artist ID is not numerical!"
ingestArtist "$artist_id" || die "init[$artist_id]: Unexpected exit code when processing artist. $?"
[ -n "$SLOW_MODE" ] && sleep 1
done
echo "init: End"
}
init