375 lines
12 KiB
Bash
Executable File
375 lines
12 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
VERSION="1.0.0"
|
|
CONFIG_DIR="$UORSS_DIR"
|
|
set -u
|
|
set +H
|
|
|
|
# This will be removed, dont rely on it
|
|
[ -z "$CONFIG_DIR" ] && echo "main: UORSS_DIR is not defined, assuming working directory. This is potentially unsafe"
|
|
[ -z "$CONFIG_DIR" ] && CONFIG_DIR="$PWD"
|
|
|
|
CONFIGD_DIR="$CONFIG_DIR/config.d"
|
|
CACHE_DIR="$CONFIG_DIR/cache"
|
|
|
|
echo "main: Running as $(whoami). Version $VERSION. Directory at $CONFIG_DIR"
|
|
die () { echo "$@" 1>&2; exit 1; }
|
|
stripCtrlChars () { sed 's/\x1b/\\x1b/g' /dev/stdin; return $?; }
|
|
|
|
[ -d "$CONFIG_DIR" ] || die "main: No uorss directory defined! See readme."
|
|
[ -d "$CONFIGD_DIR" ] || die "main: No config.d directory defined! See readme."
|
|
|
|
str_startswith () {
|
|
# str_startsswith: prefix string
|
|
local alen
|
|
local blen
|
|
local end
|
|
alen=${#1}
|
|
blen=${#2}
|
|
|
|
if [ "$alen" -gt "$blen" ]; then
|
|
return 1
|
|
else
|
|
if [ "$1" == "${2:0:$alen}" ]; then
|
|
return 0
|
|
else
|
|
return 1
|
|
fi
|
|
fi
|
|
}
|
|
|
|
str_endswith () {
|
|
# str_endswith: affix string
|
|
local alen
|
|
local blen
|
|
local end
|
|
alen=${#1}
|
|
blen=${#2}
|
|
|
|
if [ "$alen" -gt "$blen" ]; then
|
|
return 1
|
|
else
|
|
end=$((blen - alen))
|
|
if [ "$1" == "${2:$end:$alen}" ]; then
|
|
return 0
|
|
else
|
|
return 1
|
|
fi
|
|
fi
|
|
}
|
|
|
|
|
|
# Merge all files in config.d/
|
|
createConfig () {
|
|
# createConfig:
|
|
local filepath
|
|
local filebase
|
|
local json
|
|
local code
|
|
local config
|
|
config="{}"
|
|
|
|
for filepath in "$CONFIGD_DIR/"*; do
|
|
filebase="$(basename "$filepath")"
|
|
if [ -d "$filepath" ]; then
|
|
echo "createConfig: Ignoring $filebase, Is a directory." 1>&2
|
|
elif str_startswith "~" "$filebase" || str_endswith "~" "$filebase"; then
|
|
echo "createConfig: Ignoring $filebase. Is a temporary file!" 1>&2
|
|
elif str_startswith "." "$filebase"; then
|
|
echo "createConfig: Ignoring $filebase. Is a hidden file!" 1>&2
|
|
elif str_endswith ".yml" "$filebase" || str_endswith ".yaml" "$filebase" || str_endswith ".json" "$filebase"; then
|
|
json="$(yq . "$filepath" -c)"
|
|
code="$?"
|
|
if [ $code -eq 0 ]; then
|
|
config="$(jq --argjson a "$config" --argjson b "$json" \
|
|
'$a * $b * {
|
|
artists: ($a.artists + $b.artists)
|
|
}' -nc)" || die "createConfig: Failed to merge $filebase with in-memory config. Exited with code $?"
|
|
else
|
|
echo "createConfig: Failed to load $filebase. Exited with code $code" 1>&2
|
|
exit $code
|
|
fi
|
|
else
|
|
echo "createConfig: Ignoring $(basename "$filebase")." 1>&2
|
|
fi
|
|
done
|
|
|
|
jq . -c <<< "$config"
|
|
}
|
|
|
|
CONFIG="$(createConfig)" || die "main: Failed to load config.d! Exited with $?"
|
|
SLOW_MODE=""; jq -e .slow_mode 1>/dev/null <<< "$CONFIG" && SLOW_MODE="1"
|
|
STALE_MODE=""; jq -e .stale_mode 1>/dev/null <<< "$CONFIG" && STALE_MODE="1"
|
|
CACHE_ENTRY_FILES=""; jq -e .cache_entry_files 1>/dev/null <<< "$CONFIG" && CACHE_ENTRY_FILES="1"
|
|
|
|
DEPTH="$(jq .depth -r <<< "$CONFIG")"
|
|
BASE_URL="$(jq .base_url -r <<< "$CONFIG")"
|
|
PIXIV_BASE_URL="$(jq .pixiv_base_url -r <<< "$CONFIG")"
|
|
SESSION_ID="$(jq .session_id -r <<< "$CONFIG")"
|
|
USER_AGENT="$(jq .user_agent -r <<< "$CONFIG")"
|
|
COOKIES="Cookie: PHPSESSID=$SESSION_ID"
|
|
|
|
[ "$BASE_URL" == "null" ] && die "main: base_url cannot be null!"
|
|
[ "$PIXIV_BASE_URL" == "null" ] && die "main: pixiv_base_url cannot be null!"
|
|
[ "$SESSION_ID" == "null" ] && die "main: session_id cannot be null!"
|
|
|
|
[ -n "$STALE_MODE" ] && echo "main: Stale mode is on. New posts by artists will not be fetched!" 1>&2
|
|
|
|
arrayIndexList () {
|
|
# arrayIndexList: array
|
|
local length
|
|
length="$(jq 'length' -r <<< "$1")"
|
|
if [ -n "$length" ]; then
|
|
seq 1 "$length"
|
|
fi
|
|
}
|
|
|
|
getArtworkData () {
|
|
# getArtworkData: artwork_id artist_id
|
|
local json_file
|
|
local json
|
|
json_file="$CACHE_DIR/artwork/$1.pages.json"
|
|
|
|
if [ -f "$json_file" ]; then
|
|
echo "getArtworkData: Cache HIT for $2:$1" 1>&2
|
|
else
|
|
echo "getArtworkData: Cache MISS for $2:$1" 1>&2
|
|
curl -sf \
|
|
-A "$USER_AGENT" \
|
|
-H "$COOKIES" \
|
|
-H "Referer: https://www.pixiv.net/" \
|
|
"https://www.pixiv.net/ajax/illust/$1/pages" 2>/dev/null > "$json_file"
|
|
fi
|
|
cat "$json_file"
|
|
}
|
|
|
|
getArtworkInfo () {
|
|
# getArtworkInfo: artwork_id artist_id
|
|
local json_file
|
|
local json
|
|
json_file="$CACHE_DIR/artwork/$1.info.json"
|
|
|
|
if [ -f "$json_file" ]; then
|
|
echo "getArtworkInfo: Cache HIT for $2:$1" 1>&2
|
|
else
|
|
echo "getArtworkInfo: Cache MISS for $2:$1" 1>&2
|
|
curl -sf \
|
|
-A "$USER_AGENT" \
|
|
-H "$COOKIES" \
|
|
-H "Referer: https://www.pixiv.net/" \
|
|
"https://www.pixiv.net/ajax/illust/$1" 2>/dev/null > "$json_file"
|
|
fi
|
|
cat "$json_file"
|
|
}
|
|
|
|
getArtistInfo () {
|
|
# getArtistInfo: artist_id
|
|
local json_file
|
|
local json
|
|
json_file="$CACHE_DIR/users/$1.info.json"
|
|
|
|
if [ -f "$json_file" ]; then
|
|
echo "getArtworkInfo: Cache HIT for $1" 1>&2
|
|
else
|
|
echo "getArtworkInfo: Cache MISS for $1" 1>&2
|
|
curl -sf \
|
|
-A "$USER_AGENT" \
|
|
-H "$COOKIES" \
|
|
-H "Referer: https://www.pixiv.net/" \
|
|
"https://www.pixiv.net/ajax/user/$1" 2>/dev/null > "$json_file"
|
|
fi
|
|
cat "$json_file"
|
|
}
|
|
|
|
hoistArtworkPage () {
|
|
# hoistArtworkPage: url artwork_id artist_id
|
|
local artist
|
|
local pages
|
|
local page
|
|
artist="$CACHE_DIR/pages/$3"
|
|
pages="$artist/$2"
|
|
page="$pages/$(basename "$1")"
|
|
json_file="$CACHE_DIR/artwork/$1.info.json"
|
|
|
|
[ -d "$artist" ] || mkdir "$artist"
|
|
[ -d "$pages" ] || mkdir "$pages"
|
|
|
|
|
|
if [ -f "$page" ]; then
|
|
echo "hoistArtworkPage: Cache HIT for $3:$2 $1" 1>&2
|
|
else
|
|
echo "hoistArtworkPage: Cache MISS for $3:$2 $1" 1>&2
|
|
curl -sf \
|
|
-A "$USER_AGENT" \
|
|
-H "$COOKIES" \
|
|
-H "Referer: https://www.pixiv.net/" \
|
|
"$1" 2>/dev/null > "$page~"
|
|
mv "$page~" "$page"
|
|
return $?
|
|
fi
|
|
}
|
|
|
|
hoistArtworkPages () {
|
|
# hoistArtworkPages: response artwork_id artist_id
|
|
local page_index
|
|
local page_json
|
|
local page_url
|
|
|
|
for page_index in $(arrayIndexList "$(jq .body -c <<< "$1")"); do
|
|
page_json="$(jq ".body[$((page_index-1))]" -r <<< "$1")" || return 1
|
|
page_url="$(jq .urls.original -r <<< "$page_json")" || return 2
|
|
if [ "$page_url" == "null" ]; then
|
|
echo "$page_json"
|
|
return 3
|
|
fi
|
|
|
|
if ! hoistArtworkPage "$page_url" "$2" "$3"; then
|
|
echo "hoistArtworkPages: Failed for page $page_index." 1>&2
|
|
rm "$CACHE_DIR/pages/$3/$2/$(basename "$page_url")"
|
|
fi
|
|
|
|
if [ -n "$SLOW_MODE" ]; then sleep .3; fi
|
|
#ingestArtist "$artist_id" || die "init[$artist_id]: Unexpected exit code when processing artist. $?"
|
|
done
|
|
return 0
|
|
}
|
|
|
|
hoistArtworkEntry () {
|
|
# hoistArtworkEntry: artwork_id artist_id artwork_info_json artwork_pages_json
|
|
local file
|
|
local id
|
|
local href
|
|
local pages
|
|
local page_json
|
|
local page_url
|
|
local page_file
|
|
local tags
|
|
file="$CACHE_DIR/artwork/$1.entry.xml"
|
|
[ -n "$CACHE_ENTRY_FILES" ] && [ -f "$file" ] && return 0
|
|
id="$PIXIV_BASE_URL/artworks/$(jq '.body.illustId | @uri' -r <<< "$3")"
|
|
href="$(jq '.body.extraData.meta.canonical | @html' -r <<< "$3")"
|
|
tags="$(jq '.body.tags.tags | map("<a href=\"'"$PIXIV_BASE_URL"'/tags/"+ (.tag | @uri) +"\" title=\""+ (.romaji | @html) +" ("+ (.translation?.en | @html) +")\">" + (.tag | @html) + "</a>") | join(", ")' -r <<< "$3")"
|
|
pages=""
|
|
|
|
for page_index in $(arrayIndexList "$(jq .body -c <<< "$4")"); do
|
|
page_json="$(jq ".body[$((page_index-1))]" -r <<< "$4")" || return 2
|
|
page_url="$(jq .urls.original -r <<< "$page_json")" || return 2
|
|
page_file="$(jq @html -Rr <<< "$BASE_URL/pages/$2/$1/$(basename "$page_url")")"
|
|
pages="$pages"'<p><a download="" href="'"$page_file"'"><img src="'"$page_file"'" /></a></p>
|
|
'
|
|
done
|
|
|
|
echo '<entry>
|
|
<title>'"$(jq '.body.illustTitle | @html' -r <<< "$3")"'</title>
|
|
<link href="'"$href"'"/>
|
|
<link rel="alternate" type="text/html" href="'"$href"'"/>
|
|
<id>'"$id"'</id>
|
|
<published>'"$(jq '.body.uploadDate | @html' -r <<< "$3")"'</published>
|
|
<updated>'"$(date -Is | jq @html -rR)"'</updated>
|
|
<content type="xhtml" xml:base="'"$href"'">
|
|
<div xmlns="http://www.w3.org/1999/xhtml">
|
|
<p>'"$tags"'</p>
|
|
'"$pages"'</div>
|
|
</content>
|
|
<author><name>'"$(jq '.body.userName | @html' -r <<< "$3")"'</name></author>
|
|
</entry>' > "$file"
|
|
# <summary>'"$(jq '.body.alt | @html' -r <<< "$3")"'</summary>
|
|
}
|
|
|
|
ingestArtist () {
|
|
# ingestArtist: artist_id
|
|
local artist_file
|
|
local artist_json
|
|
local artwork_keys
|
|
local artwork_id
|
|
local artwork_index
|
|
local artwork_pages_json
|
|
local artwork_info_json
|
|
local feed_url
|
|
local feed_file
|
|
local entries_file
|
|
local artist_url
|
|
local artist_name
|
|
artist_file="$CACHE_DIR/users/$1.json"
|
|
artist_url="$PIXIV_BASE_URL/users/$1"
|
|
feed_url="$BASE_URL/feeds/$1.xml"
|
|
feed_file="$CACHE_DIR/feeds/$1.xml"
|
|
entries_file="$CACHE_DIR/feeds/$1.entries.xml~"
|
|
|
|
if [ -n "$STALE_MODE" ] && [ -f "$artist_file" ]; then
|
|
echo "ingestArtist: Cache HIT for $1"
|
|
else
|
|
echo "ingestArtist: Cache MISS for $1" 1>&2
|
|
curl -sf \
|
|
-A "$USER_AGENT" \
|
|
-H "$COOKIES" \
|
|
-H "Referer: https://www.pixiv.net/" \
|
|
"https://www.pixiv.net/ajax/user/$1/works/latest?lang=en" 2>/dev/null > "$artist_file"
|
|
fi
|
|
|
|
artist_info_json="$(getArtistInfo "$1")" || return 1
|
|
artist_json="$(cat "$artist_file")"
|
|
|
|
if jq -e '.error' 1>/dev/null <<< "$artist_info_json"; then
|
|
echo "ingestArtist: Error while reading artist $1 info. Message: $(jq .message -r <<< "$artist_info_json")" 1>&2
|
|
return 1
|
|
fi
|
|
|
|
if jq -e '.error' 1>/dev/null <<< "$artist_json"; then
|
|
echo "ingestArtist: Error while reading artist $1. Message: $(jq .message -r <<< "$artist_json")" 1>&2
|
|
return 1
|
|
fi
|
|
|
|
artwork_keys="$(jq ".body.illusts | keys | reverse | .[0:$DEPTH]" -c <<< "$artist_json")"
|
|
|
|
echo '' > "$feed_file~"
|
|
for artwork_index in $(arrayIndexList "$artwork_keys"); do
|
|
artwork_id="$(jq ".[$((artwork_index-1))]" -r <<< "$artwork_keys")" || return 2
|
|
artwork_info_json="$(getArtworkInfo "$artwork_id" "$1")" || return 3
|
|
artwork_pages_json="$(getArtworkData "$artwork_id" "$1")" || return 4
|
|
[ -n "$SLOW_MODE" ] && sleep .3
|
|
|
|
grep -Eq '^[0-9]+$' <<< "$artwork_id" || \
|
|
die "ingestArtist: Exception while handling artist $1, artwork with index of $artwork_index. Illustration ID is not numerical!"
|
|
|
|
hoistArtworkPages "$artwork_pages_json" "$artwork_id" "$artist_id" || echo "ingestArtist: Error while running hoistArtworkPages. Exit code $?" 1>&2
|
|
|
|
if hoistArtworkEntry "$artwork_id" "$artist_id" "$artwork_info_json" "$artwork_pages_json"; then
|
|
cat "$CACHE_DIR/artwork/$artwork_id.entry.xml" >> "$entries_file"
|
|
else
|
|
echo "ingestArtist: Error while running hoistArtworkEntry. Exit code $?" 1>&2
|
|
fi
|
|
done
|
|
|
|
artist_name="$(jq '.body.name | @html' -r <<< "$artist_info_json")"
|
|
echo '<?xml version="1.0" encoding="UTF-8"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<title>'"$artist_name"'</title>
|
|
<subtitle>'"$artist_name on Pixiv"'</subtitle>
|
|
<link href="'"$feed_url"'" rel="self" type="application/atom+xml"/>
|
|
<link href="'"$artist_url"'" rel="alternate"/>
|
|
<id>'"$artist_url"'</id>
|
|
<updated>'"$(date -Is)"'</updated>
|
|
<author><name>'"$artist_name"'</name></author>
|
|
' > "$feed_file~"
|
|
cat "$entries_file" >> "$feed_file~"
|
|
rm "$entries_file"
|
|
echo '
|
|
</feed>' >> "$feed_file~"
|
|
mv "$feed_file~" "$feed_file"
|
|
}
|
|
|
|
init () {
|
|
local artist_id
|
|
local artists_index
|
|
|
|
for artists_index in $(arrayIndexList "$(jq '.artists' -c <<< "$CONFIG")"); do
|
|
artist_id="$(jq ".artists[$((artists_index-1))]" -r <<< "$CONFIG")" || return 1
|
|
grep -Eq '^[0-9]+$' <<< "$artist_id" || die "init: Exception while handling artist $artist_id. Artist ID is not numerical!"
|
|
ingestArtist "$artist_id" || die "init[$artist_id]: Unexpected exit code when processing artist. $?"
|
|
[ -n "$SLOW_MODE" ] && sleep 1
|
|
done
|
|
echo "init: End"
|
|
}
|
|
|
|
init
|